From 74c2ad7fa209e39994d9757d1451563992d538db Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Wed, 29 Nov 2023 18:08:33 -0500 Subject: [PATCH 01/25] Reduce unnecessary allocations and reuse code --- Project.toml | 7 +-- src/NonlinearSolve.jl | 113 +++++++++++++++++++++--------------------- src/jacobian.jl | 96 +++++++++++++++++++---------------- src/klement.jl | 2 +- src/levenberg.jl | 2 +- src/raphson.jl | 57 ++++++--------------- src/trace.jl | 6 ++- src/trustRegion.jl | 2 +- src/utils.jl | 92 +++++++++++++++++++++++++--------- 9 files changed, 204 insertions(+), 173 deletions(-) diff --git a/Project.toml b/Project.toml index 60764651b..8a42f9d21 100644 --- a/Project.toml +++ b/Project.toml @@ -16,6 +16,7 @@ LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" +MaybeInplace = "bb5d69b7-63fc-4a16-80bd-7e42200c7bdb" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" @@ -42,8 +43,8 @@ NonlinearSolveZygoteExt = "Zygote" [compat] ADTypes = "0.2" -ArrayInterface = "6.0.24, 7" Aqua = "0.8" +ArrayInterface = "6.0.24, 7" BandedMatrices = "1" BenchmarkTools = "1" ConcreteStructs = "0.2" @@ -70,9 +71,9 @@ Reexport = "0.2, 1" SafeTestsets = "0.1" SciMLBase = "2.9" SciMLOperators = "0.3" -SimpleNonlinearSolve = "0.1.23" +SimpleNonlinearSolve = "1" # FIXME: Don't update the version in this PR. Using it to test SparseArrays = "<0.0.1, 1" -SparseDiffTools = "2.12" +SparseDiffTools = "2.14" StaticArrays = "1" StaticArraysCore = "1.4" Symbolics = "5" diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index c591eb4ee..f050bf007 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -8,25 +8,24 @@ import Reexport: @reexport import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_workload @recompile_invalidations begin - using DiffEqBase, - LazyArrays, LinearAlgebra, LinearSolve, Printf, SparseArrays, + using DiffEqBase, LazyArrays, LinearAlgebra, LinearSolve, Printf, SparseArrays, SparseDiffTools - using FastBroadcast: @.. - import ArrayInterface: restructure import ADTypes: AbstractFiniteDifferencesMode - import ArrayInterface: undefmatrix, + import ArrayInterface: undefmatrix, restructure, can_setindex, matrix_colors, parameterless_type, ismutable, issingular, fast_scalar_indexing import ConcreteStructs: @concrete import EnumX: @enumx + import FastBroadcast: @.. import ForwardDiff import ForwardDiff: Dual import LinearSolve: ComposePreconditioner, InvPreconditioner, needs_concrete_A + import MaybeInplace: @bb import RecursiveArrayTools: ArrayPartition, AbstractVectorOfArray, recursivecopy!, recursivefill! import SciMLBase: AbstractNonlinearAlgorithm, NLStats, _unwrap_val, has_jac, isinplace import SciMLOperators: FunctionOperator - import StaticArraysCore: StaticArray, SVector, SArray, MArray + import StaticArraysCore: StaticArray, SVector, SArray, MArray, Size, SMatrix import UnPack: @unpack using ADTypes, LineSearches, SciMLBase, SimpleNonlinearSolve @@ -55,13 +54,13 @@ isinplace(::AbstractNonlinearSolveCache{iip}) where {iip} = iip function Base.show(io::IO, alg::AbstractNonlinearSolveAlgorithm) str = "$(nameof(typeof(alg)))(" modifiers = String[] - if _getproperty(alg, Val(:ad)) !== nothing + if __getproperty(alg, Val(:ad)) !== nothing push!(modifiers, "ad = $(nameof(typeof(alg.ad)))()") end - if _getproperty(alg, Val(:linsolve)) !== nothing + if __getproperty(alg, Val(:linsolve)) !== nothing push!(modifiers, "linsolve = $(nameof(typeof(alg.linsolve)))()") end - if _getproperty(alg, Val(:linesearch)) !== nothing + if __getproperty(alg, Val(:linesearch)) !== nothing ls = alg.linesearch if ls isa LineSearch ls.method !== nothing && @@ -70,7 +69,7 @@ function Base.show(io::IO, alg::AbstractNonlinearSolveAlgorithm) push!(modifiers, "linesearch = $(nameof(typeof(alg.linesearch)))()") end end - if _getproperty(alg, Val(:radius_update_scheme)) !== nothing + if __getproperty(alg, Val(:radius_update_scheme)) !== nothing push!(modifiers, "radius_update_scheme = $(alg.radius_update_scheme)") end str = str * join(modifiers, ", ") @@ -107,7 +106,7 @@ function SciMLBase.solve!(cache::AbstractNonlinearSolveCache) end end - trace = _getproperty(cache, Val{:trace}()) + trace = __getproperty(cache, Val{:trace}()) if trace !== nothing update_trace!(trace, cache.stats.nsteps, get_u(cache), get_fu(cache), nothing, nothing, nothing; last = Val(true)) @@ -134,52 +133,52 @@ include("jacobian.jl") include("ad.jl") include("default.jl") -@setup_workload begin - nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1), - (NonlinearFunction{false}((u, p) -> u .* u .- p), [0.1]), - (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1])) - probs_nls = NonlinearProblem[] - for T in (Float32, Float64), (fn, u0) in nlfuncs - push!(probs_nls, NonlinearProblem(fn, T.(u0), T(2))) - end - - nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), - GeneralBroyden(), GeneralKlement(), DFSane(), nothing) - - probs_nlls = NonlinearLeastSquaresProblem[] - nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]), - (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]), - (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p, - resid_prototype = zeros(1)), [0.1, 0.0]), - (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), - resid_prototype = zeros(4)), [0.1, 0.1])) - for (fn, u0) in nlfuncs - push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0)) - end - nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), Float32[0.1, 0.0]), - (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), - Float32[0.1, 0.1]), - (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p, - resid_prototype = zeros(Float32, 1)), Float32[0.1, 0.0]), - (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), - resid_prototype = zeros(Float32, 4)), Float32[0.1, 0.1])) - for (fn, u0) in nlfuncs - push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0f0)) - end - - nlls_algs = (LevenbergMarquardt(), GaussNewton(), - LevenbergMarquardt(; linsolve = LUFactorization()), - GaussNewton(; linsolve = LUFactorization())) - - @compile_workload begin - for prob in probs_nls, alg in nls_algs - solve(prob, alg, abstol = 1e-2) - end - for prob in probs_nlls, alg in nlls_algs - solve(prob, alg, abstol = 1e-2) - end - end -end +# @setup_workload begin +# nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1), +# (NonlinearFunction{false}((u, p) -> u .* u .- p), [0.1]), +# (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1])) +# probs_nls = NonlinearProblem[] +# for T in (Float32, Float64), (fn, u0) in nlfuncs +# push!(probs_nls, NonlinearProblem(fn, T.(u0), T(2))) +# end + +# nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), +# GeneralBroyden(), GeneralKlement(), DFSane(), nothing) + +# probs_nlls = NonlinearLeastSquaresProblem[] +# nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]), +# (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]), +# (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p, +# resid_prototype = zeros(1)), [0.1, 0.0]), +# (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), +# resid_prototype = zeros(4)), [0.1, 0.1])) +# for (fn, u0) in nlfuncs +# push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0)) +# end +# nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), Float32[0.1, 0.0]), +# (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), +# Float32[0.1, 0.1]), +# (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p, +# resid_prototype = zeros(Float32, 1)), Float32[0.1, 0.0]), +# (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), +# resid_prototype = zeros(Float32, 4)), Float32[0.1, 0.1])) +# for (fn, u0) in nlfuncs +# push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0f0)) +# end + +# nlls_algs = (LevenbergMarquardt(), GaussNewton(), +# LevenbergMarquardt(; linsolve = LUFactorization()), +# GaussNewton(; linsolve = LUFactorization())) + +# @compile_workload begin +# for prob in probs_nls, alg in nls_algs +# solve(prob, alg, abstol = 1e-2) +# end +# for prob in probs_nlls, alg in nlls_algs +# solve(prob, alg, abstol = 1e-2) +# end +# end +# end export RadiusUpdateSchemes diff --git a/src/jacobian.jl b/src/jacobian.jl index 41c7319a1..54f1c0f0e 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -3,8 +3,11 @@ Jᵀ end -SciMLBase.isinplace(JᵀJ::KrylovJᵀJ) = isinplace(JᵀJ.Jᵀ) +__maybe_symmetric(x::KrylovJᵀJ) = x.JᵀJ + +isinplace(JᵀJ::KrylovJᵀJ) = isinplace(JᵀJ.Jᵀ) +# Select if we are going to use sparse differentiation or not sparsity_detection_alg(_, _) = NoSparsityDetection() function sparsity_detection_alg(f, ad::AbstractSparseADType) if f.sparsity === nothing @@ -33,13 +36,21 @@ function jacobian!!(J::Union{AbstractMatrix{<:Number}, Nothing}, cache) @unpack f, uf, u, p, jac_cache, alg, fu2 = cache iip = isinplace(cache) if iip - has_jac(f) ? f.jac(J, u, p) : - sparse_jacobian!(J, alg.ad, jac_cache, uf, fu2, _maybe_mutable(u, alg.ad)) + if has_jac(f) + f.jac(J, u, p) + else + sparse_jacobian!(J, alg.ad, jac_cache, uf, fu2, u) + end + return J else - return has_jac(f) ? f.jac(u, p) : - sparse_jacobian!(J, alg.ad, jac_cache, uf, _maybe_mutable(u, alg.ad)) + if has_jac(f) + return f.jac(u, p) + elseif can_setindex(typeof(J)) + return sparse_jacobian!(J, alg.ad, jac_cache, uf, u) + else + return sparse_jacobian(alg.ad, jac_cache, uf, u) + end end - return J end # Scalar case jacobian!!(::Number, cache) = last(value_derivative(cache.uf, cache.u)) @@ -59,13 +70,13 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val alg_wants_jac = (concrete_jac(alg) !== nothing && concrete_jac(alg)) # NOTE: The deepcopy is needed here since we are using the resid_prototype elsewhere - fu = f.resid_prototype === nothing ? (iip ? _mutable_zero(u) : _mutable(f(u, p))) : + fu = f.resid_prototype === nothing ? (iip ? zero(u) : f(u, p)) : (iip ? deepcopy(f.resid_prototype) : f.resid_prototype) if !has_analytic_jac && (linsolve_needs_jac || alg_wants_jac) sd = sparsity_detection_alg(f, alg.ad) ad = alg.ad - jac_cache = iip ? sparse_jacobian_cache(ad, sd, uf, fu, _maybe_mutable(u, ad)) : - sparse_jacobian_cache(ad, sd, uf, _maybe_mutable(u, ad); fx = fu) + jac_cache = iip ? sparse_jacobian_cache(ad, sd, uf, fu, u) : + sparse_jacobian_cache(ad, sd, uf, __maybe_mutable(u, ad); fx = fu) else jac_cache = nothing end @@ -76,11 +87,11 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val JacVec(uf, u; fu, autodiff = __get_nonsparse_ad(alg.ad)) else if iip - jvp = (_, u, v) -> (du = similar(fu); f.jvp(du, v, u, p); du) - jvp! = (du, _, u, v) -> f.jvp(du, v, u, p) + jvp = (_, u, v) -> (du_ = similar(fu); f.jvp(du_, v, u, p); du_) + jvp! = (du_, _, u, v) -> f.jvp(du_, v, u, p) else jvp = (_, u, v) -> f.jvp(v, u, p) - jvp! = (du, _, u, v) -> (du .= f.jvp(v, u, p)) + jvp! = (du_, _, u, v) -> (du_ .= f.jvp(v, u, p)) end op = SparseDiffTools.FwdModeAutoDiffVecProd(f, u, (), jvp, jvp!) FunctionOperator(op, u, fu; isinplace = Val(true), outofplace = Val(false), @@ -89,16 +100,18 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val else if has_analytic_jac f.jac_prototype === nothing ? undefmatrix(u) : f.jac_prototype + elseif f.jac_prototype === nothing + init_jacobian(jac_cache; preserve_immutable = Val(true)) else - f.jac_prototype === nothing ? init_jacobian(jac_cache) : f.jac_prototype + f.jac_prototype end end - du = _mutable_zero(u) + du = copy(u) if needsJᵀJ JᵀJ, Jᵀfu = __init_JᵀJ(J, _vec(fu), uf, u; f, - vjp_autodiff = __get_nonsparse_ad(_getproperty(alg, Val(:vjp_autodiff))), + vjp_autodiff = __get_nonsparse_ad(__getproperty(alg, Val(:vjp_autodiff))), jvp_autodiff = __get_nonsparse_ad(alg.ad)) end @@ -106,7 +119,8 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val linprob_A = alg isa PseudoTransient ? (J - (1 / (convert(eltype(u), alg.alpha_initial))) * I) : (needsJᵀJ ? __maybe_symmetric(JᵀJ) : J) - linsolve = __setup_linsolve(linprob_A, needsJᵀJ ? Jᵀfu : fu, du, p, alg) + linsolve = linsolve_caches(linprob_A, needsJᵀJ ? Jᵀfu : fu, du, p, alg; + linsolve_kwargs) else linsolve = nothing end @@ -115,22 +129,33 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val return uf, linsolve, J, fu, jac_cache, du end -function __setup_linsolve(A, b, u, p, alg) - linprob = LinearProblem(A, _vec(b); u0 = _vec(u)) +## Special Handling for Scalars +function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u::Number, p, + ::Val{false}; linsolve_with_JᵀJ::Val{needsJᵀJ} = Val(false), + kwargs...) where {needsJᵀJ, F} + # NOTE: Scalar `u` assumes scalar output from `f` + uf = SciMLBase.JacobianWrapper{false}(f, p) + needsJᵀJ && return uf, nothing, u, nothing, nothing, u, u, u + return uf, FakeLinearSolveJLCache(u, u), u, nothing, nothing, u +end - weight = similar(u) - recursivefill!(weight, true) +# Linear Solve Cache +function linsolve_caches(A, b, u, p, alg; linsolve_kwargs = (;)) + if alg.linsolve === nothing && A isa SMatrix && linsolve_kwargs === (;) + # Default handling for SArrays in LinearSolve is not great. Some parts are patched + # but there are quite a few unnecessary allocations + return FakeLinearSolveJLCache(A, b) + end + + linprob = LinearProblem(A, _vec(b); u0 = _vec(u), linsolve_kwargs...) + + weight = __init_ones(u) Pl, Pr = wrapprecs(alg.precs(A, nothing, u, p, nothing, nothing, nothing, nothing, nothing)..., weight) return init(linprob, alg.linsolve; alias_A = true, alias_b = true, Pl, Pr) end -__setup_linsolve(A::KrylovJᵀJ, b, u, p, alg) = __setup_linsolve(A.JᵀJ, b, u, p, alg) - -__get_nonsparse_ad(::AutoSparseForwardDiff) = AutoForwardDiff() -__get_nonsparse_ad(::AutoSparseFiniteDiff) = AutoFiniteDiff() -__get_nonsparse_ad(::AutoSparseZygote) = AutoZygote() -__get_nonsparse_ad(ad) = ad +linsolve_caches(A::KrylovJᵀJ, b, u, p, alg) = linsolve_caches(A.JᵀJ, b, u, p, alg) __init_JᵀJ(J::Number, args...; kwargs...) = zero(J), zero(J) function __init_JᵀJ(J::AbstractArray, fu, args...; kwargs...) @@ -180,24 +205,7 @@ function __concrete_vjp_autodiff(vjp_autodiff, jvp_autodiff, uf) end end -__maybe_symmetric(x) = Symmetric(x) -__maybe_symmetric(x::Number) = x -# LinearSolve with `nothing` doesn't dispatch correctly here -__maybe_symmetric(x::StaticArray) = x -__maybe_symmetric(x::SparseArrays.AbstractSparseMatrix) = x -__maybe_symmetric(x::SciMLOperators.AbstractSciMLOperator) = x -__maybe_symmetric(x::KrylovJᵀJ) = x.JᵀJ - -## Special Handling for Scalars -function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u::Number, p, - ::Val{false}; linsolve_with_JᵀJ::Val{needsJᵀJ} = Val(false), - kwargs...) where {needsJᵀJ, F} - # NOTE: Scalar `u` assumes scalar output from `f` - uf = SciMLBase.JacobianWrapper{false}(f, p) - needsJᵀJ && return uf, nothing, u, nothing, nothing, u, u, u - return uf, nothing, u, nothing, nothing, u -end - +# Generic Handling of Krylov Methods for Normal Form Linear Solves function __update_JᵀJ!(iip::Val, cache, sym::Symbol, J) return __update_JᵀJ!(iip, cache, sym, getproperty(cache, sym), J) end diff --git a/src/klement.jl b/src/klement.jl index ec32dc6b8..8a9640fd4 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -87,7 +87,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKleme linsolve_alg = alg_.linsolve === nothing && u isa Array ? LUFactorization() : nothing alg = set_linsolve(alg_, linsolve_alg) - linsolve = __setup_linsolve(J, _vec(fu), _vec(du), p, alg) + linsolve = linsolve_caches(J, _vec(fu), _vec(du), p, alg) end abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u, diff --git a/src/levenberg.jl b/src/levenberg.jl index dcc07d85e..94e882223 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -232,7 +232,7 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, fill!(mat_tmp, zero(eltype(u))) rhs_tmp = vcat(_vec(fu1), _vec(u)) fill!(rhs_tmp, zero(eltype(u))) - linsolve = __setup_linsolve(mat_tmp, rhs_tmp, u, p, alg) + linsolve = linsolve_caches(mat_tmp, rhs_tmp, u, p, alg) end return LevenbergMarquardtCache{iip, !_unwrap_val(linsolve_with_JᵀJ)}(f, alg, u, copy(u), diff --git a/src/raphson.jl b/src/raphson.jl index 594b893e5..4c4125579 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -80,7 +80,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::NewtonRaphso kwargs...) where {uType, iip} alg = get_concrete_algorithm(alg_, prob) @unpack f, u0, p = prob - u = alias_u0 ? u0 : deepcopy(u0) + u = __maybe_unaliased(u0, alias_u0) fu1 = evaluate_f(prob, u) uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs) @@ -91,62 +91,37 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::NewtonRaphso ls_cache = init_linesearch_cache(alg.linesearch, f, u, p, fu1, Val(iip)) trace = init_nonlinearsolve_trace(alg, u, fu1, ApplyArray(__zero, J), du; kwargs...) - return NewtonRaphsonCache{iip}(f, alg, u, copy(u), fu1, fu2, du, p, uf, linsolve, J, + @bb u_prev = copy(u) + + return NewtonRaphsonCache{iip}(f, alg, u, u_prev, fu1, fu2, du, p, uf, linsolve, J, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), ls_cache, tc_cache, trace) end -function perform_step!(cache::NewtonRaphsonCache{true}) - @unpack u, u_prev, fu1, f, p, alg, J, linsolve, du = cache - jacobian!!(J, cache) +function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} + @unpack alg = cache + + cache.J = jacobian!!(cache.J, cache) # u = u - J \ fu - linres = dolinsolve(alg.precs, linsolve; A = J, b = _vec(fu1), linu = _vec(du), - p, reltol = cache.abstol) + linres = dolinsolve(alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu1), + linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - # Line Search - α = perform_linesearch!(cache.ls_cache, u, du) - _axpy!(-α, du, u) - f(cache.fu1, u, p) - - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), J, - cache.du, α) - - check_and_update!(cache, cache.fu1, cache.u, cache.u_prev) - - @. u_prev = u - cache.stats.nf += 1 - cache.stats.njacs += 1 - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 - return nothing -end - -function perform_step!(cache::NewtonRaphsonCache{false}) - @unpack u, u_prev, fu1, f, p, alg, linsolve = cache - - cache.J = jacobian!!(cache.J, cache) - # u = u - J \ fu - if linsolve === nothing - cache.du = fu1 / cache.J - else - linres = dolinsolve(alg.precs, linsolve; A = cache.J, b = _vec(fu1), - linu = _vec(cache.du), p, reltol = cache.abstol) - cache.linsolve = linres.cache - end + !iip && (cache.du = linres.u) # Line Search - α = perform_linesearch!(cache.ls_cache, u, cache.du) - cache.u = @. u - α * cache.du # `u` might not support mutation - cache.fu1 = f(cache.u, p) + α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) + @bb axpy!(-α, cache.du, cache.u) + + evaluate_f(cache, cache.u) update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, cache.du, α) check_and_update!(cache, cache.fu1, cache.u, cache.u_prev) - cache.u_prev = cache.u + @bb copyto!(cache.u_prev, cache.u) cache.stats.nf += 1 cache.stats.njacs += 1 cache.stats.nsolve += 1 diff --git a/src/trace.jl b/src/trace.jl index c458c7d07..e89efe956 100644 --- a/src/trace.jl +++ b/src/trace.jl @@ -151,8 +151,10 @@ function reset!(trace::NonlinearSolveTrace) end function Base.show(io::IO, trace::NonlinearSolveTrace) - for entry in trace.history - show(io, entry) + if trace.history !== nothing + foreach(entry -> show(io, entry), trace.history) + else + print(io, "Tracing Disabled") end return nothing end diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 8b4041b75..5493aa4d7 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -248,7 +248,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, uf, _, J, fu2, jac_cache, du, H, g = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs, linsolve_with_JᵀJ = Val(true), lininit = Val(false)) g = _restructure(fu1, g) - linsolve = u isa Number ? nothing : __setup_linsolve(J, fu2, du, p, alg) + linsolve = u isa Number ? nothing : linsolve_caches(J, fu2, du, p, alg) u_tmp = zero(u) u_cauchy = zero(u) diff --git a/src/utils.jl b/src/utils.jl index bf6d1152f..d3017d42f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,5 +1,15 @@ const DEFAULT_NORM = DiffEqBase.NONLINEARSOLVE_DEFAULT_NORM +@concrete mutable struct FakeLinearSolveJLCache + A + b +end + +@concrete struct FakeLinearSolveJLResult + cache + u +end + # Ignores NaN function __findmin(f, x) return findmin(x) do xᵢ @@ -55,7 +65,7 @@ function default_adargs_to_adtype(; chunk_size = missing, autodiff = nothing, end """ -value_derivative(f, x) + value_derivative(f, x) Compute `f(x), d/dx f(x)` in the most efficient way. """ @@ -65,10 +75,6 @@ function value_derivative(f::F, x::R) where {F, R} ForwardDiff.value(out), ForwardDiff.extract_derivative(T, out) end -function value_derivative(f::F, x::SVector) where {F} - f(x), ForwardDiff.jacobian(f, x) -end - @inline value(x) = x @inline value(x::Dual) = ForwardDiff.value(x) @inline value(x::AbstractArray{<:Dual}) = map(ForwardDiff.value, x) @@ -82,6 +88,15 @@ end DEFAULT_PRECS(W, du, u, p, t, newW, Plprev, Prprev, cachedata) = nothing, nothing +function dolinsolve(precs::P, linsolve::FakeLinearSolveJLCache; A = nothing, + linu = nothing, b = nothing, du = nothing, p = nothing, weight = nothing, + cachedata = nothing, reltol = nothing, reuse_A_if_factorization = false) where {P} + A !== nothing && (linsolve.A = A) + b !== nothing && (linsolve.b = b) + linres = linsolve.A \ linsolve.b + return FakeLinearSolveJLResult(linsolve, linres) +end + function dolinsolve(precs::P, linsolve; A = nothing, linu = nothing, b = nothing, du = nothing, p = nothing, weight = nothing, cachedata = nothing, reltol = nothing, reuse_A_if_factorization = false) where {P} @@ -155,33 +170,32 @@ _mutable_zero(x::SArray) = MArray(x) _mutable(x) = x _mutable(x::SArray) = MArray(x) -_maybe_mutable(x, ::AbstractFiniteDifferencesMode) = _mutable(x) +# __maybe_mutable(x, ::AbstractFiniteDifferencesMode) = _mutable(x) # The shadow allocated for Enzyme needs to be mutable -_maybe_mutable(x, ::AutoSparseEnzyme) = _mutable(x) -_maybe_mutable(x, _) = x +__maybe_mutable(x, ::AutoSparseEnzyme) = _mutable(x) +__maybe_mutable(x, _) = x # Helper function to get value of `f(u, p)` function evaluate_f(prob::Union{NonlinearProblem{uType, iip}, NonlinearLeastSquaresProblem{uType, iip}}, u) where {uType, iip} @unpack f, u0, p = prob if iip - fu = f.resid_prototype === nothing ? zero(u) : f.resid_prototype + fu = f.resid_prototype === nothing ? similar(u) : f.resid_prototype f(fu, u, p) else - fu = _mutable(f(u, p)) + fu = f(u, p) end return fu end -evaluate_f(cache, u; fu = nothing) = evaluate_f(cache.f, u, cache.p, Val(cache.iip); fu) - -function evaluate_f(f, u, p, ::Val{iip}; fu = nothing) where {iip} - if iip - f(fu, u, p) - return fu +function evaluate_f(cache, u) + @unpack f, p = cache.prob + if isinplace(cache) + f(get_fu(cache), u, p) else - return f(u, p) + set_fu!(cache, f(u, p)) end + return nothing end """ @@ -206,7 +220,7 @@ end function __get_concrete_algorithm(alg, prob) @unpack sparsity, jac_prototype = prob.f use_sparse_ad = sparsity !== nothing || jac_prototype !== nothing - ad = if eltype(prob.u0) <: Complex + ad = if !ForwardDiff.can_dual(eltype(prob.u0)) # Use Finite Differencing use_sparse_ad ? AutoSparseFiniteDiff() : AutoFiniteDiff() else @@ -310,16 +324,16 @@ function __init_low_rank_jacobian(u, fu, threshold::Int) end # Check Singular Matrix -_issingular(x::Number) = iszero(x) -@generated function _issingular(x::T) where {T} +@inline _issingular(x::Number) = iszero(x) +@inline @generated function _issingular(x::T) where {T} hasmethod(issingular, Tuple{T}) && return :(issingular(x)) return :(__issingular(x)) end -__issingular(x::AbstractMatrix{T}) where {T} = cond(x) > inv(sqrt(eps(real(T)))) -__issingular(x) = false ## If SciMLOperator and such +@inline __issingular(x::AbstractMatrix{T}) where {T} = cond(x) > inv(sqrt(eps(real(T)))) +@inline __issingular(x) = false ## If SciMLOperator and such # Safe getproperty -@generated function _getproperty(s::S, ::Val{X}) where {S, X} +@generated function __getproperty(s::S, ::Val{X}) where {S, X} hasfield(S, X) && return :(s.$X) return :(nothing) end @@ -348,6 +362,7 @@ _try_factorize_and_check_singular!(::Nothing, x) = _issingular(x), false return :(@. y += α * x) end +# Non-square matrix @inline _needs_square_A(_, ::Number) = true @inline _needs_square_A(_, ::StaticArray) = true @inline _needs_square_A(alg, _) = LinearSolve.needs_square_A(alg.linsolve) @@ -355,9 +370,40 @@ end # Define special concatenation for certain Array combinations @inline _vcat(x, y) = vcat(x, y) +# LazyArrays for tracing __zero(x::AbstractArray) = zero(x) __zero(x) = x LazyArrays.applied_eltype(::typeof(__zero), x) = eltype(x) LazyArrays.applied_ndims(::typeof(__zero), x) = ndims(x) LazyArrays.applied_size(::typeof(__zero), x) = size(x) LazyArrays.applied_axes(::typeof(__zero), x) = axes(x) + +# SparseAD --> NonSparseAD +@inline __get_nonsparse_ad(::AutoSparseForwardDiff) = AutoForwardDiff() +@inline __get_nonsparse_ad(::AutoSparseFiniteDiff) = AutoFiniteDiff() +@inline __get_nonsparse_ad(::AutoSparseZygote) = AutoZygote() +@inline __get_nonsparse_ad(ad) = ad + +# Use Symmetric Matrices if known to be efficient +@inline __maybe_symmetric(x) = Symmetric(x) +@inline __maybe_symmetric(x::Number) = x +## LinearSolve with `nothing` doesn't dispatch correctly here +@inline __maybe_symmetric(x::StaticArray) = x +@inline __maybe_symmetric(x::SparseArrays.AbstractSparseMatrix) = x +@inline __maybe_symmetric(x::SciMLOperators.AbstractSciMLOperator) = x + +# Unalias +@inline __maybe_unaliased(x::Union{Number, SArray}, ::Bool) = x +@inline function __maybe_unaliased(x::AbstractArray, alias::Bool) + # Spend time coping iff we will mutate the array + (alias || !can_setindex(typeof(x))) && return x + return deepcopy(x) +end + +# Init ones +@inline function __init_ones(x) + w = similar(x) + recursivefill!(w, true) + return w +end +@inline __init_ones(x::StaticArray) = ones(typeof(x)) From 9bc8f5bacb706ee0c3ef9382e2270b1ec5a791db Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Wed, 29 Nov 2023 21:45:54 -0500 Subject: [PATCH 02/25] Reuse more code in Broyden --- src/NonlinearSolve.jl | 2 +- src/broyden.jl | 95 ++++++++++++++----------------------------- src/raphson.jl | 2 +- src/utils.jl | 38 ++++++++++++----- 4 files changed, 60 insertions(+), 77 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index f050bf007..f1782b8c1 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -25,7 +25,7 @@ import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_work AbstractVectorOfArray, recursivecopy!, recursivefill! import SciMLBase: AbstractNonlinearAlgorithm, NLStats, _unwrap_val, has_jac, isinplace import SciMLOperators: FunctionOperator - import StaticArraysCore: StaticArray, SVector, SArray, MArray, Size, SMatrix + import StaticArraysCore: StaticArray, SVector, SArray, MArray, Size, SMatrix, MMatrix import UnPack: @unpack using ADTypes, LineSearches, SciMLBase, SimpleNonlinearSolve diff --git a/src/broyden.jl b/src/broyden.jl index 008ff589d..e0b69f19c 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -65,81 +65,46 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde termination_condition = nothing, internalnorm::F = DEFAULT_NORM, kwargs...) where {uType, iip, F} @unpack f, u0, p = prob - u = alias_u0 ? u0 : deepcopy(u0) + u = __maybe_unaliased(u0, alias_u0) fu = evaluate_f(prob, u) - du = _mutable_zero(u) + @bb du = copy(u) J⁻¹ = __init_identity_jacobian(u, fu) reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(real(eltype(u)))) : alg.reset_tolerance reset_check = x -> abs(x) ≤ reset_tolerance + @bb u_prev = copy(u) + @bb fu2 = copy(fu) + @bb dfu = similar(fu) + @bb J⁻¹₂ = similar(u) + @bb J⁻¹df = similar(u) + abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u, termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, J⁻¹, du; uses_jac_inverse = Val(true), kwargs...) - return GeneralBroydenCache{iip}(f, alg, u, zero(u), du, fu, zero(fu), - zero(fu), p, J⁻¹, zero(_reshape(fu, 1, :)), _mutable_zero(u), false, 0, - alg.max_resets, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, - reset_tolerance, reset_check, prob, NLStats(1, 0, 0, 0, 0), + return GeneralBroydenCache{iip}(f, alg, u, u_prev, du, fu, fu2, dfu, p, J⁻¹, + J⁻¹₂, J⁻¹df, false, 0, alg.max_resets, maxiters, internalnorm, ReturnCode.Default, + abstol, reltol, reset_tolerance, reset_check, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace) end -function perform_step!(cache::GeneralBroydenCache{true}) - @unpack f, p, du, fu, fu2, dfu, u, u_prev, J⁻¹, J⁻¹df, J⁻¹₂ = cache - T = eltype(u) - - mul!(_vec(du), J⁻¹, _vec(fu)) - α = perform_linesearch!(cache.ls_cache, u, du) - _axpy!(-α, du, u) - f(fu2, u, p) - - update_trace_with_invJ!(cache.trace, cache.stats.nsteps + 1, get_u(cache), - get_fu(cache), J⁻¹, du, α) - - check_and_update!(cache, fu2, u, u_prev) - cache.stats.nf += 1 - - cache.force_stop && return nothing +function perform_step!(cache::GeneralBroydenCache{iip}) where {iip} + T = eltype(cache.u) - # Update the inverse jacobian - dfu .= fu2 .- fu + @bb cache.du = cache.J⁻¹ × vec(cache.fu) + α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) + @bb axpy!(-α, cache.du, cache.u) - if all(cache.reset_check, du) || all(cache.reset_check, dfu) - if cache.resets ≥ cache.max_resets - cache.retcode = ReturnCode.ConvergenceFailure - cache.force_stop = true - return nothing - end - fill!(J⁻¹, 0) - J⁻¹[diagind(J⁻¹)] .= T(1) - cache.resets += 1 + if iip + cache.f(cache.fu2, cache.u, cache.p) else - du .*= -1 - mul!(_vec(J⁻¹df), J⁻¹, _vec(dfu)) - mul!(J⁻¹₂, _vec(du)', J⁻¹) - denom = dot(du, J⁻¹df) - du .= (du .- J⁻¹df) ./ ifelse(iszero(denom), T(1e-5), denom) - mul!(J⁻¹, _vec(du), J⁻¹₂, 1, 1) + cache.fu2 = cache.f(cache.u, cache.p) end - fu .= fu2 - @. u_prev = u - - return nothing -end - -function perform_step!(cache::GeneralBroydenCache{false}) - @unpack f, p = cache - - T = eltype(cache.u) - - cache.du = _restructure(cache.du, cache.J⁻¹ * _vec(cache.fu)) - α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) - cache.u = cache.u .- α * cache.du - cache.fu2 = f(cache.u, p) update_trace_with_invJ!(cache.trace, cache.stats.nsteps + 1, get_u(cache), - get_fu(cache), cache.J⁻¹, cache.du, α) + cache.fu2, cache.J⁻¹, cache.du, α) check_and_update!(cache, cache.fu2, cache.u, cache.u_prev) cache.stats.nf += 1 @@ -147,25 +112,27 @@ function perform_step!(cache::GeneralBroydenCache{false}) cache.force_stop && return nothing # Update the inverse jacobian - cache.dfu = cache.fu2 .- cache.fu + @bb @. cache.dfu = cache.fu2 - cache.fu + if all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu) if cache.resets ≥ cache.max_resets cache.retcode = ReturnCode.ConvergenceFailure cache.force_stop = true return nothing end - cache.J⁻¹ = __init_identity_jacobian(cache.u, cache.fu) + cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹) cache.resets += 1 else - cache.du = -cache.du - cache.J⁻¹df = _restructure(cache.J⁻¹df, cache.J⁻¹ * _vec(cache.dfu)) - cache.J⁻¹₂ = _vec(cache.du)' * cache.J⁻¹ + @bb cache.du .*= -1 + @bb cache.J⁻¹df = cache.J⁻¹ × vec(cache.dfu) + @bb cache.J⁻¹₂ = cache.J⁻¹ × vec(cache.du) denom = dot(cache.du, cache.J⁻¹df) - cache.du = (cache.du .- cache.J⁻¹df) ./ ifelse(iszero(denom), T(1e-5), denom) - cache.J⁻¹ = cache.J⁻¹ .+ _vec(cache.du) * cache.J⁻¹₂ + @bb @. cache.du = (cache.du - cache.J⁻¹df) / ifelse(iszero(denom), T(1e-5), denom) + @bb cache.J⁻¹ += vec(cache.du) × transpose(cache.J⁻¹₂) end - cache.fu = cache.fu2 - cache.u_prev = @. cache.u + + @bb copyto!(cache.fu, cache.fu2) + @bb copyto!(cache.u_prev, cache.u) return nothing end diff --git a/src/raphson.jl b/src/raphson.jl index 4c4125579..52e47ac01 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -114,7 +114,7 @@ function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) @bb axpy!(-α, cache.du, cache.u) - evaluate_f(cache, cache.u) + evaluate_f(cache, cache.u, cache.p) update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, cache.du, α) diff --git a/src/utils.jl b/src/utils.jl index d3017d42f..ab2db093f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -188,12 +188,11 @@ function evaluate_f(prob::Union{NonlinearProblem{uType, iip}, return fu end -function evaluate_f(cache, u) - @unpack f, p = cache.prob +function evaluate_f(cache, u, p) if isinplace(cache) - f(get_fu(cache), u, p) + cache.prob.f(get_fu(cache), u, p) else - set_fu!(cache, f(u, p)) + set_fu!(cache, cache.prob.f(u, p)) end return nothing end @@ -301,14 +300,31 @@ function check_and_update!(tc_cache, cache, fu, u, uprev, end end -__init_identity_jacobian(u::Number, _) = u -function __init_identity_jacobian(u, fu) - return convert(parameterless_type(_mutable(u)), - Matrix{eltype(u)}(I, length(fu), length(u))) +@inline __init_identity_jacobian(u::Number, _) = one(u) +@inline function __init_identity_jacobian(u, fu) + J = similar(fu, promote_type(eltype(fu), eltype(u)), length(fu), length(u)) + fill!(J, zero(eltype(J))) + J[diagind(J)] .= one(eltype(J)) + return J end -function __init_identity_jacobian(u::StaticArray, fu) - return convert(MArray{Tuple{length(fu), length(u)}}, - Matrix{eltype(u)}(I, length(fu), length(u))) +@inline function __init_identity_jacobian(u::StaticArray, fu::StaticArray) + T = promote_type(eltype(fu), eltype(u)) + return MArray{Tuple{prod(Size(fu)), prod(Size(u))}, T}(I) +end +@inline function __init_identity_jacobian(u::SArray, fu::SArray) + T = promote_type(eltype(fu), eltype(u)) + return SArray{Tuple{prod(Size(fu)), prod(Size(u))}, T}(I) +end + +@inline __reinit_identity_jacobian!!(J::Number) = one(J) +@inline function __reinit_identity_jacobian!!(J::AbstractMatrix) + fill!(J, zero(eltype(J))) + J[diagind(J)] .= one(eltype(J)) + return J +end +@inline function __reinit_identity_jacobian!!(J::SMatrix) + S = Size(J) + return SArray{Tuple{S[1], S[2]}, eltype(J)}(I) end function __init_low_rank_jacobian(u::StaticArray, fu, threshold::Int) From a5c6195c4b8952078542082ecc22d3328a81b18f Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Wed, 29 Nov 2023 22:01:55 -0500 Subject: [PATCH 03/25] Share reinit code --- src/NonlinearSolve.jl | 36 +++++++++++++++++++++++++++++++++++- src/broyden.jl | 29 +++-------------------------- src/raphson.jl | 28 ---------------------------- 3 files changed, 38 insertions(+), 55 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index f1782b8c1..9096525ee 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -8,7 +8,8 @@ import Reexport: @reexport import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_workload @recompile_invalidations begin - using DiffEqBase, LazyArrays, LinearAlgebra, LinearSolve, Printf, SparseArrays, + using DiffEqBase, + LazyArrays, LinearAlgebra, LinearSolve, Printf, SparseArrays, SparseDiffTools import ADTypes: AbstractFiniteDifferencesMode @@ -51,6 +52,39 @@ abstract type AbstractNonlinearSolveCache{iip} end isinplace(::AbstractNonlinearSolveCache{iip}) where {iip} = iip +function SciMLBase.reinit!(cache::AbstractNonlinearSolveCache{iip}, u0 = get_u(cache); + p = cache.p, abstol = cache.abstol, reltol = cache.reltol, + maxiters = cache.maxiters, alias_u0 = false, + termination_condition = get_termination_mode(cache.tc_cache)) where {iip} + cache.p = p + if iip + recursivecopy!(get_u(cache), u0) + cache.f(cache.fu1, get_u(cache), p) + else + cache.u = __maybe_unaliased(u0, alias_u0) + set_fu!(cache, cache.f(cache.u, p)) + end + + reset!(cache.trace) + abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, get_fu(cache), + get_u(cache), termination_condition) + + cache.abstol = abstol + cache.reltol = reltol + cache.tc_cache = tc_cache + cache.maxiters = maxiters + cache.stats.nf = 1 + cache.stats.nsteps = 1 + cache.force_stop = false + cache.retcode = ReturnCode.Default + + __reinit_internal!(cache) + + return cache +end + +__reinit_internal!(cache::AbstractNonlinearSolveCache) = nothing + function Base.show(io::IO, alg::AbstractNonlinearSolveAlgorithm) str = "$(nameof(typeof(alg)))(" modifiers = String[] diff --git a/src/broyden.jl b/src/broyden.jl index e0b69f19c..dbc4f5131 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -137,31 +137,8 @@ function perform_step!(cache::GeneralBroydenCache{iip}) where {iip} return nothing end -function SciMLBase.reinit!(cache::GeneralBroydenCache{iip}, u0 = cache.u; p = cache.p, - abstol = cache.abstol, reltol = cache.reltol, maxiters = cache.maxiters, - termination_condition = get_termination_mode(cache.tc_cache)) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - cache.f(cache.fu, cache.u, p) - else - # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter - cache.u = u0 - cache.fu = cache.f(cache.u, p) - end - - reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, cache.fu, cache.u, - termination_condition) - - cache.abstol = abstol - cache.reltol = reltol - cache.tc_cache = tc_cache - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 +function __reinit_internal!(cache::GeneralBroydenCache) + cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹) cache.resets = 0 - cache.force_stop = false - cache.retcode = ReturnCode.Default - return cache + return nothing end diff --git a/src/raphson.jl b/src/raphson.jl index 52e47ac01..07b155f1c 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -128,31 +128,3 @@ function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} cache.stats.nfactors += 1 return nothing end - -function SciMLBase.reinit!(cache::NewtonRaphsonCache{iip}, u0 = cache.u; p = cache.p, - abstol = cache.abstol, reltol = cache.reltol, maxiters = cache.maxiters, - termination_condition = get_termination_mode(cache.tc_cache)) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - cache.f(cache.fu1, cache.u, p) - else - # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter - cache.u = u0 - cache.fu1 = cache.f(cache.u, p) - end - - reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, cache.fu1, cache.u, - termination_condition) - - cache.abstol = abstol - cache.reltol = reltol - cache.tc_cache = tc_cache - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 - cache.force_stop = false - cache.retcode = ReturnCode.Default - return cache -end From f147663015cef79f1ed75171e6d1014b2941e755 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 01:14:46 -0500 Subject: [PATCH 04/25] Reuse Klement Code --- Project.toml | 4 +- src/NonlinearSolve.jl | 9 +-- src/klement.jl | 173 +++++++++++++----------------------------- src/utils.jl | 10 +-- 4 files changed, 60 insertions(+), 136 deletions(-) diff --git a/Project.toml b/Project.toml index 8a42f9d21..9385b14a2 100644 --- a/Project.toml +++ b/Project.toml @@ -26,7 +26,7 @@ SciMLOperators = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804" -StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" [weakdeps] @@ -75,7 +75,6 @@ SimpleNonlinearSolve = "1" # FIXME: Don't update the version in this PR. Using SparseArrays = "<0.0.1, 1" SparseDiffTools = "2.14" StaticArrays = "1" -StaticArraysCore = "1.4" Symbolics = "5" Test = "1" UnPack = "1.0" @@ -99,7 +98,6 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804" -StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 9096525ee..63987898b 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -8,9 +8,8 @@ import Reexport: @reexport import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_workload @recompile_invalidations begin - using DiffEqBase, - LazyArrays, LinearAlgebra, LinearSolve, Printf, SparseArrays, - SparseDiffTools + using ADTypes, DiffEqBase, LazyArrays, LineSearches, LinearAlgebra, LinearSolve, Printf, + SciMLBase, SimpleNonlinearSolve, SparseArrays, SparseDiffTools, StaticArrays import ADTypes: AbstractFiniteDifferencesMode import ArrayInterface: undefmatrix, restructure, can_setindex, @@ -26,10 +25,8 @@ import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_work AbstractVectorOfArray, recursivecopy!, recursivefill! import SciMLBase: AbstractNonlinearAlgorithm, NLStats, _unwrap_val, has_jac, isinplace import SciMLOperators: FunctionOperator - import StaticArraysCore: StaticArray, SVector, SArray, MArray, Size, SMatrix, MMatrix + import StaticArrays: StaticArray, SVector, SArray, MArray, Size, SMatrix, MMatrix import UnPack: @unpack - - using ADTypes, LineSearches, SciMLBase, SimpleNonlinearSolve end @reexport using ADTypes, LineSearches, SciMLBase, SimpleNonlinearSolve diff --git a/src/klement.jl b/src/klement.jl index 8a9640fd4..4296defcf 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -74,38 +74,43 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKleme termination_condition = nothing, internalnorm::F = DEFAULT_NORM, linsolve_kwargs = (;), kwargs...) where {uType, iip, F} @unpack f, u0, p = prob - u = alias_u0 ? u0 : deepcopy(u0) + u = __maybe_unaliased(u0, alias_u0) fu = evaluate_f(prob, u) J = __init_identity_jacobian(u, fu) - du = _mutable_zero(u) + @bb du = similar(u) if u isa Number - linsolve = nothing + linsolve = FakeLinearSolveJLCache(J, fu) alg = alg_ else # For General Julia Arrays default to LU Factorization - linsolve_alg = alg_.linsolve === nothing && u isa Array ? LUFactorization() : - nothing + linsolve_alg = (alg_.linsolve === nothing && (u isa Array || u isa StaticArray)) ? + LUFactorization() : nothing alg = set_linsolve(alg_, linsolve_alg) - linsolve = linsolve_caches(J, _vec(fu), _vec(du), p, alg) + linsolve = linsolve_caches(J, _vec(fu), _vec(du), p, alg; linsolve_kwargs) end abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u, termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, J, du; kwargs...) - return GeneralKlementCache{iip}(f, alg, u, zero(u), fu, zero(fu), du, p, linsolve, - J, zero(J), zero(J), _vec(zero(fu)), _vec(zero(fu)), 0, false, - maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, - NLStats(1, 0, 0, 0, 0), + @bb u_prev = copy(u) + @bb fu2 = similar(fu) + @bb J_cache = similar(J) + @bb J_cache2 = similar(J) + @bb Jᵀ²du = similar(fu) + @bb Jdu = similar(fu) + + return GeneralKlementCache{iip}(f, alg, u, u_prev, fu, fu2, du, p, linsolve, J, J_cache, + J_cache2, Jᵀ²du, Jdu, 0, false, maxiters, internalnorm, ReturnCode.Default, abstol, + reltol, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace) end -function perform_step!(cache::GeneralKlementCache{true}) - @unpack u, u_prev, fu, f, p, alg, J, linsolve, du = cache - T = eltype(J) - - singular, fact_done = _try_factorize_and_check_singular!(linsolve, J) +function perform_step!(cache::GeneralKlementCache{iip}) where {iip} + @unpack linsolve, alg = cache + T = eltype(cache.J) + singular, fact_done = __try_factorize_and_check_singular!(linsolve, cache.J) if singular if cache.resets == alg.max_resets @@ -114,88 +119,33 @@ function perform_step!(cache::GeneralKlementCache{true}) return nothing end fact_done = false - fill!(J, zero(T)) - J[diagind(J)] .= T(1) + cache.J = __reinit_identity_jacobian!!(cache.J) cache.resets += 1 end # u = u - J \ fu - linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J), - b = _vec(fu), linu = _vec(du), p, reltol = cache.abstol) + linres = dolinsolve(alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu), + linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - # Line Search - α = perform_linesearch!(cache.ls_cache, u, du) - _axpy!(-α, du, u) - f(cache.fu2, u, p) - - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), cache.fu2, J, - cache.du, α) - - check_and_update!(cache, cache.fu2, cache.u, cache.u_prev) - cache.stats.nf += 1 - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 - - cache.force_stop && return nothing - - # Update the Jacobian - cache.du .*= -1 - cache.J_cache .= cache.J' .^ 2 - cache.Jdu .= _vec(du) .^ 2 - mul!(cache.Jᵀ²du, cache.J_cache, cache.Jdu) - mul!(cache.Jdu, J, _vec(du)) - cache.fu .= cache.fu2 .- cache.fu - cache.fu .= _restructure(cache.fu, - (_vec(cache.fu) .- cache.Jdu) ./ max.(cache.Jᵀ²du, eps(real(T)))) - mul!(cache.J_cache, _vec(cache.fu), _vec(du)') - cache.J_cache .*= J - mul!(cache.J_cache2, cache.J_cache, J) - J .+= cache.J_cache2 - - @. u_prev = u - cache.fu .= cache.fu2 - - return nothing -end - -function perform_step!(cache::GeneralKlementCache{false}) - @unpack fu, f, p, alg, J, linsolve = cache + !iip && (cache.du = linres.u) - T = eltype(J) - - singular, fact_done = _try_factorize_and_check_singular!(linsolve, J) - - if singular - if cache.resets == alg.max_resets - cache.force_stop = true - cache.retcode = ReturnCode.ConvergenceFailure - return nothing - end - fact_done = false - cache.J = __init_identity_jacobian(cache.u, fu) - cache.resets += 1 - end + # Line Search + α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) + @bb axpy!(-α, cache.du, cache.u) - # u = u - J \ fu - if linsolve === nothing - cache.du = fu / cache.J + if iip + cache.f(cache.fu2, cache.u, cache.p) else - linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J), - b = _vec(fu), linu = _vec(cache.du), p, reltol = cache.abstol) - cache.linsolve = linres.cache + cache.fu2 = cache.f(cache.u, cache.p) end - # Line Search - α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) - cache.u = @. cache.u - α * cache.du # `u` might not support mutation - cache.fu2 = f(cache.u, p) - - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), cache.fu2, J, + update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), cache.fu2, cache.J, cache.du, α) check_and_update!(cache, cache.fu2, cache.u, cache.u_prev) - cache.u_prev = cache.u + @bb copyto!(cache.u_prev, cache.u) + cache.stats.nf += 1 cache.stats.nsolve += 1 cache.stats.nfactors += 1 @@ -203,46 +153,27 @@ function perform_step!(cache::GeneralKlementCache{false}) cache.force_stop && return nothing # Update the Jacobian - cache.du = -cache.du - cache.J_cache = cache.J' .^ 2 - cache.Jdu = _vec(cache.du) .^ 2 - cache.Jᵀ²du = cache.J_cache * cache.Jdu - cache.Jdu = J * _vec(cache.du) - cache.fu = cache.fu2 .- cache.fu - cache.fu = _restructure(cache.fu, - (_vec(cache.fu) .- cache.Jdu) ./ max.(cache.Jᵀ²du, eps(real(T)))) - cache.J_cache = ((_vec(cache.fu) * _vec(cache.du)') .* J) * J - cache.J = J .+ cache.J_cache - - cache.fu = cache.fu2 + @bb cache.du .*= -1 + @bb cache.J_cache .= cache.J' .^ 2 + @bb @. cache.Jdu = cache.du ^ 2 + @bb cache.Jᵀ²du = cache.J_cache × vec(cache.Jdu) + @bb cache.Jdu = cache.J × vec(cache.du) + @bb @. cache.fu = cache.fu2 - cache.fu - return nothing -end + @bb @. cache.fu = (cache.fu - cache.Jdu) / max(cache.Jᵀ²du, eps(real(T))) -function SciMLBase.reinit!(cache::GeneralKlementCache{iip}, u0 = cache.u; p = cache.p, - abstol = cache.abstol, reltol = cache.reltol, maxiters = cache.maxiters, - termination_condition = get_termination_mode(cache.tc_cache)) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - cache.f(cache.fu, cache.u, p) - else - # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter - cache.u = u0 - cache.fu = cache.f(cache.u, p) - end + @bb cache.J_cache = vec(cache.fu) × transpose(_vec(cache.du)) + @bb @. cache.J_cache *= cache.J + @bb cache.J_cache2 = cache.J_cache × cache.J + @bb cache.J .+= cache.J_cache2 - reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, cache.fu, cache.u, - termination_condition) + @bb copyto!(cache.fu, cache.fu2) - cache.abstol = abstol - cache.reltol = reltol - cache.tc_cache = tc_cache - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 - cache.force_stop = false - cache.retcode = ReturnCode.Default - return cache + return nothing +end + +function __reinit_internal!(cache::GeneralKlementCache) + cache.J = __reinit_identity_jacobian!!(cache.J) + cache.resets = 0 + return nothing end diff --git a/src/utils.jl b/src/utils.jl index ab2db093f..bc38d9257 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -356,8 +356,8 @@ end # If factorization is LU then perform that and update the linsolve cache # else check if the matrix is singular -function _try_factorize_and_check_singular!(linsolve, X) - if linsolve.cacheval isa LU +function __try_factorize_and_check_singular!(linsolve, X) + if linsolve.cacheval isa LU || linsolve.cacheval isa StaticArrays.LU # LU Factorization was used linsolve.A = X linsolve.cacheval = LinearSolve.do_factorization(linsolve.alg, X, linsolve.b, @@ -368,11 +368,9 @@ function _try_factorize_and_check_singular!(linsolve, X) end return _issingular(X), false end -_try_factorize_and_check_singular!(::Nothing, x) = _issingular(x), false - -@inline _reshape(x, args...) = reshape(x, args...) -@inline _reshape(x::Number, args...) = x +__try_factorize_and_check_singular!(::FakeLinearSolveJLCache, x) = _issingular(x), false +# TODO: Remove. handled in MaybeInplace.jl @generated function _axpy!(α, x, y) hasmethod(axpy!, Tuple{α, x, y}) && return :(axpy!(α, x, y)) return :(@. y += α * x) From 4f2dec04fa30432b19062f4726cb8cc487ef02ab Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 15:35:07 -0500 Subject: [PATCH 05/25] Make the internal field names more consistent --- src/NonlinearSolve.jl | 21 +++++++++-------- src/broyden.jl | 49 +++++++++++++++----------------------- src/jacobian.jl | 11 +++++---- src/klement.jl | 55 +++++++++++++++++-------------------------- src/raphson.jl | 31 +++++++++++------------- src/trace.jl | 20 ++++++++++++++++ src/utils.jl | 16 +++++++------ 7 files changed, 101 insertions(+), 102 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 63987898b..d55527c90 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -117,8 +117,9 @@ end function not_terminated(cache::AbstractNonlinearSolveCache) return !cache.force_stop && cache.stats.nsteps < cache.maxiters end -get_fu(cache::AbstractNonlinearSolveCache) = cache.fu1 -set_fu!(cache::AbstractNonlinearSolveCache, fu) = (cache.fu1 = fu) + +get_fu(cache::AbstractNonlinearSolveCache) = cache.fu +set_fu!(cache::AbstractNonlinearSolveCache, fu) = (cache.fu = fu) get_u(cache::AbstractNonlinearSolveCache) = cache.u SciMLBase.set_u!(cache::AbstractNonlinearSolveCache, u) = (cache.u = u) @@ -152,17 +153,17 @@ include("trace.jl") include("extension_algs.jl") include("linesearch.jl") include("raphson.jl") -include("trustRegion.jl") -include("levenberg.jl") -include("gaussnewton.jl") -include("dfsane.jl") -include("pseudotransient.jl") +# include("trustRegion.jl") +# include("levenberg.jl") +# include("gaussnewton.jl") +# include("dfsane.jl") +# include("pseudotransient.jl") include("broyden.jl") include("klement.jl") -include("lbroyden.jl") +# include("lbroyden.jl") include("jacobian.jl") -include("ad.jl") -include("default.jl") +# include("ad.jl") +# include("default.jl") # @setup_workload begin # nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1), diff --git a/src/broyden.jl b/src/broyden.jl index dbc4f5131..d1c8ac433 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -31,15 +31,14 @@ end f alg u - u_prev + u_cache du fu - fu2 + fu_cache dfu p J⁻¹ - J⁻¹₂ - J⁻¹df + J⁻¹dfu force_stop::Bool resets::Int max_resets::Int @@ -57,9 +56,6 @@ end trace end -get_fu(cache::GeneralBroydenCache) = cache.fu -set_fu!(cache::GeneralBroydenCache, fu) = (cache.fu = fu) - function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyden, args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, termination_condition = nothing, internalnorm::F = DEFAULT_NORM, @@ -73,19 +69,18 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde alg.reset_tolerance reset_check = x -> abs(x) ≤ reset_tolerance - @bb u_prev = copy(u) - @bb fu2 = copy(fu) + @bb u_cache = copy(u) + @bb fu_cache = similar(fu) @bb dfu = similar(fu) - @bb J⁻¹₂ = similar(u) - @bb J⁻¹df = similar(u) + @bb J⁻¹dfu = similar(u) abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u, termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, J⁻¹, du; uses_jac_inverse = Val(true), kwargs...) - return GeneralBroydenCache{iip}(f, alg, u, u_prev, du, fu, fu2, dfu, p, J⁻¹, - J⁻¹₂, J⁻¹df, false, 0, alg.max_resets, maxiters, internalnorm, ReturnCode.Default, + return GeneralBroydenCache{iip}(f, alg, u, u_cache, du, fu, fu_cache, dfu, p, + J⁻¹, J⁻¹dfu, false, 0, alg.max_resets, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, reset_tolerance, reset_check, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace) end @@ -97,22 +92,16 @@ function perform_step!(cache::GeneralBroydenCache{iip}) where {iip} α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) @bb axpy!(-α, cache.du, cache.u) - if iip - cache.f(cache.fu2, cache.u, cache.p) - else - cache.fu2 = cache.f(cache.u, cache.p) - end - - update_trace_with_invJ!(cache.trace, cache.stats.nsteps + 1, get_u(cache), - cache.fu2, cache.J⁻¹, cache.du, α) + evaluate_f(cache, cache.u, cache.p) - check_and_update!(cache, cache.fu2, cache.u, cache.u_prev) + update_trace!(cache, α) + check_and_update!(cache, cache.fu, cache.u, cache.u_cache) cache.stats.nf += 1 cache.force_stop && return nothing # Update the inverse jacobian - @bb @. cache.dfu = cache.fu2 - cache.fu + @bb @. cache.dfu = cache.fu - cache.fu_cache if all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu) if cache.resets ≥ cache.max_resets @@ -124,15 +113,15 @@ function perform_step!(cache::GeneralBroydenCache{iip}) where {iip} cache.resets += 1 else @bb cache.du .*= -1 - @bb cache.J⁻¹df = cache.J⁻¹ × vec(cache.dfu) - @bb cache.J⁻¹₂ = cache.J⁻¹ × vec(cache.du) - denom = dot(cache.du, cache.J⁻¹df) - @bb @. cache.du = (cache.du - cache.J⁻¹df) / ifelse(iszero(denom), T(1e-5), denom) - @bb cache.J⁻¹ += vec(cache.du) × transpose(cache.J⁻¹₂) + @bb cache.J⁻¹dfu = cache.J⁻¹ × vec(cache.dfu) + @bb cache.u_cache = cache.J⁻¹ × vec(cache.du) + denom = dot(cache.du, cache.J⁻¹dfu) + @bb @. cache.du = (cache.du - cache.J⁻¹dfu) / ifelse(iszero(denom), T(1e-5), denom) + @bb cache.J⁻¹ += vec(cache.du) × transpose(cache.u_cache) end - @bb copyto!(cache.fu, cache.fu2) - @bb copyto!(cache.u_prev, cache.u) + @bb copyto!(cache.fu_cache, cache.fu) + @bb copyto!(cache.u_cache, cache.u) return nothing end diff --git a/src/jacobian.jl b/src/jacobian.jl index 54f1c0f0e..a63a57ffc 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -33,13 +33,13 @@ jacobian!!(J, _) = J # `!!` notation is from BangBang.jl since J might be jacobian in case of oop `f.jac` # and we don't want wasteful `copyto!` function jacobian!!(J::Union{AbstractMatrix{<:Number}, Nothing}, cache) - @unpack f, uf, u, p, jac_cache, alg, fu2 = cache + @unpack f, uf, u, p, jac_cache, alg, fu_cache = cache iip = isinplace(cache) if iip if has_jac(f) f.jac(J, u, p) else - sparse_jacobian!(J, alg.ad, jac_cache, uf, fu2, u) + sparse_jacobian!(J, alg.ad, jac_cache, uf, fu_cache, u) end return J else @@ -116,9 +116,10 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val end if linsolve_init - linprob_A = alg isa PseudoTransient ? - (J - (1 / (convert(eltype(u), alg.alpha_initial))) * I) : - (needsJᵀJ ? __maybe_symmetric(JᵀJ) : J) + linprob_A = needsJᵀJ ? __maybe_symmetric(JᵀJ) : J + # linprob_A = alg isa PseudoTransient ? + # (J - (1 / (convert(eltype(u), alg.alpha_initial))) * I) : + # (needsJᵀJ ? __maybe_symmetric(JᵀJ) : J) linsolve = linsolve_caches(linprob_A, needsJᵀJ ? Jᵀfu : fu, du, p, alg; linsolve_kwargs) else diff --git a/src/klement.jl b/src/klement.jl index 4296defcf..4ec612273 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -41,17 +41,17 @@ end f alg u - u_prev + u_cache fu - fu2 + fu_cache du p linsolve J J_cache - J_cache2 - Jᵀ²du + J_cache_2 Jdu + Jdu_cache resets force_stop maxiters::Int @@ -66,9 +66,6 @@ end trace end -get_fu(cache::GeneralKlementCache) = cache.fu -set_fu!(cache::GeneralKlementCache, fu) = (cache.fu = fu) - function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKlement, args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, termination_condition = nothing, internalnorm::F = DEFAULT_NORM, @@ -94,16 +91,16 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKleme termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, J, du; kwargs...) - @bb u_prev = copy(u) - @bb fu2 = similar(fu) + @bb u_cache = similar(u) + @bb fu_cache = similar(fu) @bb J_cache = similar(J) - @bb J_cache2 = similar(J) - @bb Jᵀ²du = similar(fu) + @bb J_cache_2 = similar(J) @bb Jdu = similar(fu) + @bb Jdu_cache = similar(fu) - return GeneralKlementCache{iip}(f, alg, u, u_prev, fu, fu2, du, p, linsolve, J, J_cache, - J_cache2, Jᵀ²du, Jdu, 0, false, maxiters, internalnorm, ReturnCode.Default, abstol, - reltol, prob, NLStats(1, 0, 0, 0, 0), + return GeneralKlementCache{iip}(f, alg, u, u_cache, fu, fu_cache, du, p, linsolve, + J, J_cache, J_cache_2, Jdu, Jdu_cache, 0, false, maxiters, internalnorm, + ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace) end @@ -127,24 +124,18 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} linres = dolinsolve(alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - !iip && (cache.du = linres.u) # Line Search α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) @bb axpy!(-α, cache.du, cache.u) - if iip - cache.f(cache.fu2, cache.u, cache.p) - else - cache.fu2 = cache.f(cache.u, cache.p) - end + evaluate_f(cache, cache.u, cache.p) - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), cache.fu2, cache.J, - cache.du, α) + update_trace!(cache, α) + check_and_update!(cache, cache.fu, cache.u, cache.u_cache) - check_and_update!(cache, cache.fu2, cache.u, cache.u_prev) - @bb copyto!(cache.u_prev, cache.u) + @bb copyto!(cache.u_cache, cache.u) cache.stats.nf += 1 cache.stats.nsolve += 1 @@ -155,19 +146,17 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} # Update the Jacobian @bb cache.du .*= -1 @bb cache.J_cache .= cache.J' .^ 2 - @bb @. cache.Jdu = cache.du ^ 2 - @bb cache.Jᵀ²du = cache.J_cache × vec(cache.Jdu) + @bb @. cache.Jdu = cache.du^2 + @bb cache.Jdu_cache = cache.J_cache × vec(cache.Jdu) @bb cache.Jdu = cache.J × vec(cache.du) - @bb @. cache.fu = cache.fu2 - cache.fu - - @bb @. cache.fu = (cache.fu - cache.Jdu) / max(cache.Jᵀ²du, eps(real(T))) - + @bb @. cache.fu_cache = (cache.fu - cache.fu_cache - cache.Jdu) / + max(cache.Jdu_cache, eps(real(T))) @bb cache.J_cache = vec(cache.fu) × transpose(_vec(cache.du)) @bb @. cache.J_cache *= cache.J - @bb cache.J_cache2 = cache.J_cache × cache.J - @bb cache.J .+= cache.J_cache2 + @bb cache.J_cache_2 = cache.J_cache × cache.J + @bb cache.J .+= cache.J_cache_2 - @bb copyto!(cache.fu, cache.fu2) + @bb copyto!(cache.fu_cache, cache.fu) return nothing end diff --git a/src/raphson.jl b/src/raphson.jl index 07b155f1c..835fadd48 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -52,9 +52,9 @@ end f alg u - u_prev - fu1 - fu2 + fu + u_cache + fu_cache du p uf @@ -81,19 +81,19 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::NewtonRaphso alg = get_concrete_algorithm(alg_, prob) @unpack f, u0, p = prob u = __maybe_unaliased(u0, alias_u0) - fu1 = evaluate_f(prob, u) - uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); + fu = evaluate_f(prob, u) + uf, linsolve, J, fu_cache, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu1, u, + abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u, termination_condition) - ls_cache = init_linesearch_cache(alg.linesearch, f, u, p, fu1, Val(iip)) - trace = init_nonlinearsolve_trace(alg, u, fu1, ApplyArray(__zero, J), du; kwargs...) + ls_cache = init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)) + trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(__zero, J), du; kwargs...) - @bb u_prev = copy(u) + @bb u_cache = copy(u) - return NewtonRaphsonCache{iip}(f, alg, u, u_prev, fu1, fu2, du, p, uf, linsolve, J, + return NewtonRaphsonCache{iip}(f, alg, u, fu, u_cache, fu_cache, du, p, uf, linsolve, J, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), ls_cache, tc_cache, trace) end @@ -104,10 +104,9 @@ function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} cache.J = jacobian!!(cache.J, cache) # u = u - J \ fu - linres = dolinsolve(alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu1), + linres = dolinsolve(alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - !iip && (cache.du = linres.u) # Line Search @@ -116,12 +115,10 @@ function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} evaluate_f(cache, cache.u, cache.p) - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, - cache.du, α) - - check_and_update!(cache, cache.fu1, cache.u, cache.u_prev) + update_trace!(cache, α) + check_and_update!(cache, cache.fu, cache.u, cache.u_cache) - @bb copyto!(cache.u_prev, cache.u) + @bb copyto!(cache.u_cache, cache.u) cache.stats.nf += 1 cache.stats.njacs += 1 cache.stats.nsolve += 1 diff --git a/src/trace.jl b/src/trace.jl index e89efe956..39c01d2c7 100644 --- a/src/trace.jl +++ b/src/trace.jl @@ -240,3 +240,23 @@ function update_trace_with_invJ!(trace::NonlinearSolveTrace{ShT, StT}, iter, u, show_now && show(entry) return trace end + +function update_trace!(cache::AbstractNonlinearSolveCache, α = true) + trace = __getproperty(cache, Val(:trace)) + trace === nothing && return nothing + + J = __getproperty(cache, Val(:J)) + if J === nothing + J_inv = __getproperty(cache, Val(:J⁻¹)) + if J_inv === nothing + update_trace!(trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), + nothing, cache.du, α) + else + update_trace_with_invJ!(trace, cache.stats.nsteps + 1, get_u(cache), + get_fu(cache), J_inv, cache.du, α) + end + else + update_trace!(trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), J, + cache.du, α) + end +end diff --git a/src/utils.jl b/src/utils.jl index bc38d9257..c6b670f8b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -327,15 +327,17 @@ end return SArray{Tuple{S[1], S[2]}, eltype(J)}(I) end -function __init_low_rank_jacobian(u::StaticArray, fu, threshold::Int) - Vᵀ = convert(MArray{Tuple{length(u), threshold}}, - zeros(eltype(u), length(u), threshold)) - U = convert(MArray{Tuple{threshold, length(u)}}, zeros(eltype(u), threshold, length(u))) +function __init_low_rank_jacobian(u::StaticArray{S1, T1}, fu::StaticArray{S2, T2}, + ::Val{threshold}) where {S1, S2, T1, T2, threshold} + T = promote_type(T1, T2) + fuSize, uSize = Size(fu), Size(u) + Vᵀ = MArray{Tuple{threshold, prod(uSize)}, T}(undef) + U = MArray{Tuple{prod(fuSize), threshold}, T}(undef) return U, Vᵀ end -function __init_low_rank_jacobian(u, fu, threshold::Int) - Vᵀ = convert(parameterless_type(_mutable(u)), zeros(eltype(u), length(u), threshold)) - U = convert(parameterless_type(_mutable(u)), zeros(eltype(u), threshold, length(u))) +function __init_low_rank_jacobian(u, fu, ::Val{threshold}) where {threshold} + Vᵀ = similar(u, threshold, length(u)) + U = similar(u, length(fu), threshold) return U, Vᵀ end From 4c61c4a7ee5b80eed15982aae872bdfca96dad66 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 16:27:18 -0500 Subject: [PATCH 06/25] Fix PT --- src/NonlinearSolve.jl | 6 +- src/jacobian.jl | 9 +-- src/klement.jl | 6 +- src/pseudotransient.jl | 140 +++++++++++++---------------------------- 4 files changed, 56 insertions(+), 105 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index d55527c90..936cd840e 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -20,7 +20,7 @@ import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_work import ForwardDiff import ForwardDiff: Dual import LinearSolve: ComposePreconditioner, InvPreconditioner, needs_concrete_A - import MaybeInplace: @bb + import MaybeInplace: setindex_trait, @bb, CanSetindex, CannotSetindex import RecursiveArrayTools: ArrayPartition, AbstractVectorOfArray, recursivecopy!, recursivefill! import SciMLBase: AbstractNonlinearAlgorithm, NLStats, _unwrap_val, has_jac, isinplace @@ -80,7 +80,7 @@ function SciMLBase.reinit!(cache::AbstractNonlinearSolveCache{iip}, u0 = get_u(c return cache end -__reinit_internal!(cache::AbstractNonlinearSolveCache) = nothing +__reinit_internal!(::AbstractNonlinearSolveCache) = nothing function Base.show(io::IO, alg::AbstractNonlinearSolveAlgorithm) str = "$(nameof(typeof(alg)))(" @@ -157,7 +157,7 @@ include("raphson.jl") # include("levenberg.jl") # include("gaussnewton.jl") # include("dfsane.jl") -# include("pseudotransient.jl") +include("pseudotransient.jl") include("broyden.jl") include("klement.jl") # include("lbroyden.jl") diff --git a/src/jacobian.jl b/src/jacobian.jl index a63a57ffc..6747da1a8 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -116,10 +116,11 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val end if linsolve_init - linprob_A = needsJᵀJ ? __maybe_symmetric(JᵀJ) : J - # linprob_A = alg isa PseudoTransient ? - # (J - (1 / (convert(eltype(u), alg.alpha_initial))) * I) : - # (needsJᵀJ ? __maybe_symmetric(JᵀJ) : J) + if alg isa PseudoTransient && J isa SciMLOperators.AbstractSciMLOperator + linprob_A = J - inv(convert(eltype(u), alg.alpha_initial)) * I + else + linprob_A = needsJᵀJ ? __maybe_symmetric(JᵀJ) : J + end linsolve = linsolve_caches(linprob_A, needsJᵀJ ? Jᵀfu : fu, du, p, alg; linsolve_kwargs) else diff --git a/src/klement.jl b/src/klement.jl index 4ec612273..37d6a6c07 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -120,9 +120,11 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} cache.resets += 1 end + A = ifelse(cache.J isa SMatrix || cache.J isa Number || !fact_done, cache.J, nothing) + # u = u - J \ fu - linres = dolinsolve(alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu), - linu = _vec(cache.du), cache.p, reltol = cache.abstol) + linres = dolinsolve(alg.precs, cache.linsolve; A, + b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache !iip && (cache.du = linres.u) diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index 3873202c4..c6f1926f1 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -41,7 +41,6 @@ SIAM Journal on Scientific Computing,25, 553-569.](https://doi.org/10.1137/S1064 alpha_initial end -#concrete_jac(::PseudoTransient{CJ}) where {CJ} = CJ function set_ad(alg::PseudoTransient{CJ}, ad) where {CJ} return PseudoTransient{CJ}(ad, alg.linsolve, alg.precs, alg.alpha_initial) end @@ -56,9 +55,9 @@ end f alg u - u_prev - fu1 - fu2 + u_cache + fu + fu_cache du p alpha @@ -86,92 +85,66 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::PseudoTransi alg = get_concrete_algorithm(alg_, prob) @unpack f, u0, p = prob - u = alias_u0 ? u0 : deepcopy(u0) - fu1 = evaluate_f(prob, u) - uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); + u = __maybe_unaliased(u0, alias_u0) + fu = evaluate_f(prob, u) + uf, linsolve, J, fu_cache, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs) alpha = convert(eltype(u), alg.alpha_initial) - res_norm = internalnorm(fu1) + res_norm = internalnorm(fu) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu1, u, + @bb u_cache = copy(u) + + abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u, termination_condition) - trace = init_nonlinearsolve_trace(alg, u, fu1, ApplyArray(__zero, J), du; kwargs...) + trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(__zero, J), du; kwargs...) - return PseudoTransientCache{iip}(f, alg, u, copy(u), fu1, fu2, du, p, alpha, res_norm, - uf, linsolve, J, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, - abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache, trace) + return PseudoTransientCache{iip}(f, alg, u, u_cache, fu, fu_cache, du, p, alpha, + res_norm, uf, linsolve, J, jac_cache, false, maxiters, internalnorm, + ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache, trace) end -function perform_step!(cache::PseudoTransientCache{true}) - @unpack u, u_prev, fu1, f, p, alg, J, linsolve, du, alpha = cache - jacobian!!(J, cache) +function perform_step!(cache::PseudoTransientCache{iip}) where {iip} + @unpack alg = cache - inv_alpha = inv(alpha) - if J isa SciMLBase.AbstractSciMLOperator - J = J - inv_alpha * I - else - idxs = diagind(J) - if fast_scalar_indexing(J) - @inbounds for i in axes(J, 1) - J[i, i] = J[i, i] - inv_alpha + cache.J = jacobian!!(cache.J, cache) + + inv_α = inv(cache.alpha) + if cache.J isa SciMLOperators.AbstractSciMLOperator + A = cache.J - inv_α * I + elseif setindex_trait(cache.J) === CanSetindex() + idxs = diagind(cache.J) + if fast_scalar_indexing(cache.J) + @inbounds for i in axes(cache.J, 1) + cache.J[i, i] = cache.J[i, i] - inv_α end else - @.. broadcast=false @view(J[idxs])=@view(J[idxs]) - inv_alpha + @.. broadcast=false @view(cache.J[idxs])=@view(cache.J[idxs]) - inv_α end + A = cache.J + else + cache.J = cache.J - inv_α * I + A = cache.J end # u = u - J \ fu - linres = dolinsolve(alg.precs, linsolve; A = J, b = _vec(fu1), linu = _vec(du), - p, reltol = cache.abstol) + linres = dolinsolve(alg.precs, cache.linsolve; A, b = _vec(cache.fu), + linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - @. u = u - du - f(fu1, u, p) - - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), J, - cache.du) - - new_norm = cache.internalnorm(fu1) - cache.alpha *= cache.res_norm / new_norm - cache.res_norm = new_norm - - check_and_update!(cache, cache.fu1, cache.u, cache.u_prev) - - @. u_prev = u - cache.stats.nf += 1 - cache.stats.njacs += 1 - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 - return nothing -end + !iip && (cache.du = linres.u) -function perform_step!(cache::PseudoTransientCache{false}) - @unpack u, u_prev, fu1, f, p, alg, linsolve, alpha = cache + @bb axpy!(-true, cache.du, cache.u) - cache.J = jacobian!!(cache.J, cache) - - inv_alpha = inv(alpha) - cache.J = cache.J - inv_alpha * I - # u = u - J \ fu - if linsolve === nothing - cache.du = fu1 / cache.J - else - linres = dolinsolve(alg.precs, linsolve; A = cache.J, b = _vec(fu1), - linu = _vec(cache.du), p, reltol = cache.abstol) - cache.linsolve = linres.cache - end - cache.u = @. u - cache.du # `u` might not support mutation - cache.fu1 = f(cache.u, p) + evaluate_f(cache, cache.u, cache.p) - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, - cache.du) + update_trace!(cache, true) - new_norm = cache.internalnorm(fu1) + new_norm = cache.internalnorm(cache.fu) cache.alpha *= cache.res_norm / new_norm cache.res_norm = new_norm - check_and_update!(cache, cache.fu1, cache.u, cache.u_prev) + check_and_update!(cache, cache.fu, cache.u, cache.u_cache) - cache.u_prev = cache.u + @bb copyto!(cache.u_cache, cache.u) cache.stats.nf += 1 cache.stats.njacs += 1 cache.stats.nsolve += 1 @@ -179,33 +152,8 @@ function perform_step!(cache::PseudoTransientCache{false}) return nothing end -function SciMLBase.reinit!(cache::PseudoTransientCache{iip}, u0 = cache.u; p = cache.p, - alpha = cache.alpha, abstol = cache.abstol, reltol = cache.reltol, - termination_condition = get_termination_mode(cache.tc_cache), - maxiters = cache.maxiters) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - cache.f(cache.fu1, cache.u, p) - else - # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter - cache.u = u0 - cache.fu1 = cache.f(cache.u, p) - end - - reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, cache.fu1, cache.u, - termination_condition) - - cache.alpha = convert(eltype(cache.u), alpha) - cache.res_norm = cache.internalnorm(cache.fu1) - cache.abstol = abstol - cache.reltol = reltol - cache.tc_cache = tc_cache - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 - cache.force_stop = false - cache.retcode = ReturnCode.Default - return cache +function __reinit_internal!(cache::PseudoTransientCache) + cache.alpha = convert(eltype(cache.u), cache.alg.alpha_initial) + cache.res_norm = cache.internalnorm(cache.fu) + return nothing end From c44ba3736e9e783fb80d0bba15e03975a95f900f Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 17:39:28 -0500 Subject: [PATCH 07/25] Cleanup LBroyden --- src/NonlinearSolve.jl | 2 +- src/broyden.jl | 2 +- src/lbroyden.jl | 267 +++++++++++++++--------------------------- 3 files changed, 96 insertions(+), 175 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 936cd840e..ae1814901 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -160,7 +160,7 @@ include("raphson.jl") include("pseudotransient.jl") include("broyden.jl") include("klement.jl") -# include("lbroyden.jl") +include("lbroyden.jl") include("jacobian.jl") # include("ad.jl") # include("default.jl") diff --git a/src/broyden.jl b/src/broyden.jl index d1c8ac433..504e16912 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -70,7 +70,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde reset_check = x -> abs(x) ≤ reset_tolerance @bb u_cache = copy(u) - @bb fu_cache = similar(fu) + @bb fu_cache = copy(fu) @bb dfu = similar(fu) @bb J⁻¹dfu = similar(u) diff --git a/src/lbroyden.jl b/src/lbroyden.jl index b000325dc..8882a2645 100644 --- a/src/lbroyden.jl +++ b/src/lbroyden.jl @@ -17,34 +17,36 @@ An implementation of `LimitedMemoryBroyden` with resetting and line search. recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch specifically designed for Broyden's method. """ -@concrete struct LimitedMemoryBroyden <: AbstractNewtonAlgorithm{false, Nothing} +@concrete struct LimitedMemoryBroyden{threshold} <: AbstractNewtonAlgorithm{false, Nothing} max_resets::Int - threshold::Int linesearch reset_tolerance end function LimitedMemoryBroyden(; max_resets::Int = 3, linesearch = nothing, - threshold::Int = 27, reset_tolerance = nothing) + threshold::Union{Val, Int} = Val(27), reset_tolerance = nothing) linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) - return LimitedMemoryBroyden(max_resets, threshold, linesearch, reset_tolerance) + return LimitedMemoryBroyden{SciMLBase._unwrap_val(threshold)}(max_resets, linesearch, + reset_tolerance) end +__get_threshold(::LimitedMemoryBroyden{threshold}) where {threshold} = Val(threshold) +__get_unwrapped_threshold(::LimitedMemoryBroyden{threshold}) where {threshold} = threshold + @concrete mutable struct LimitedMemoryBroydenCache{iip} <: AbstractNonlinearSolveCache{iip} f alg u - u_prev + u_cache du fu - fu2 + fu_cache dfu p U Vᵀ - Ux - xᵀVᵀ - u_cache + threshold_cache + mat_cache vᵀ_cache force_stop::Bool resets::Int @@ -64,128 +66,74 @@ end trace end -get_fu(cache::LimitedMemoryBroydenCache) = cache.fu -set_fu!(cache::LimitedMemoryBroydenCache, fu) = (cache.fu = fu) - function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemoryBroyden, args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, termination_condition = nothing, internalnorm::F = DEFAULT_NORM, kwargs...) where {uType, iip, F} @unpack f, u0, p = prob - u = alias_u0 ? u0 : deepcopy(u0) - if u isa Number - # If u is a number then we simply use Broyden + threshold = __get_threshold(alg) + η = min(__get_unwrapped_threshold(alg), maxiters) + + if u0 isa Number || length(u0) ≤ η + # If u is a number or very small problem then we simply use Broyden return SciMLBase.__init(prob, - GeneralBroyden(; alg.max_resets, alg.reset_tolerance, - alg.linesearch), args...; alias_u0, maxiters, abstol, internalnorm, kwargs...) + GeneralBroyden(; alg.max_resets, alg.reset_tolerance, alg.linesearch), args...; + alias_u0, maxiters, abstol, internalnorm, kwargs...) end + u = __maybe_unaliased(u0, alias_u0) + fu = evaluate_f(prob, u) - threshold = min(alg.threshold, maxiters) + U, Vᵀ = __init_low_rank_jacobian(u, fu, threshold) - du = copy(fu) + + @bb du = copy(fu) + @bb u_cache = similar(u) + @bb fu_cache = copy(fu) + @bb dfu = similar(fu) + @bb vᵀ_cache = similar(u) + @bb mat_cache = similar(u) + reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(real(eltype(u)))) : alg.reset_tolerance reset_check = x -> abs(x) ≤ reset_tolerance abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u, termination_condition) + U_part = selectdim(U, 1, 1:0) Vᵀ_part = selectdim(Vᵀ, 2, 1:0) trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(*, Vᵀ_part, U_part), du; kwargs...) - return LimitedMemoryBroydenCache{iip}(f, alg, u, zero(u), du, fu, zero(fu), - zero(fu), p, U, Vᵀ, similar(u, threshold), similar(u, 1, threshold), - zero(u), zero(u), false, 0, 0, alg.max_resets, maxiters, internalnorm, - ReturnCode.Default, abstol, reltol, reset_tolerance, reset_check, prob, - NLStats(1, 0, 0, 0, 0), - init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace) -end - -function perform_step!(cache::LimitedMemoryBroydenCache{true}) - @unpack f, p, du, u = cache - T = eltype(u) - - α = perform_linesearch!(cache.ls_cache, u, du) - _axpy!(-α, du, u) - f(cache.fu2, u, p) + threshold_cache = __lbroyden_threshold_cache(u, threshold) - idx = min(cache.iterations_since_reset, size(cache.U, 1)) - U_part = selectdim(cache.U, 1, 1:idx) - Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), cache.fu2, - ApplyArray(*, Vᵀ_part, U_part), du, α) - - check_and_update!(cache, cache.fu2, cache.u, cache.u_prev) - cache.stats.nf += 1 - - cache.force_stop && return nothing - - # Update the Inverse Jacobian Approximation - cache.dfu .= cache.fu2 .- cache.fu - - # Only try to reset if we have enough iterations since last reset - if cache.iterations_since_reset > size(cache.U, 1) && - (all(cache.reset_check, du) || all(cache.reset_check, cache.dfu)) - if cache.resets ≥ cache.max_resets - cache.retcode = ReturnCode.ConvergenceFailure - cache.force_stop = true - return nothing - end - cache.iterations_since_reset = 0 - cache.resets += 1 - cache.du .= cache.fu - else - cache.du .*= -1 - idx = min(cache.iterations_since_reset, size(cache.U, 1)) - U_part = selectdim(cache.U, 1, 1:idx) - Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) - - __lbroyden_matvec!(_vec(cache.vᵀ_cache), cache.Ux, U_part, Vᵀ_part, _vec(cache.du)) - __lbroyden_rmatvec!(_vec(cache.u_cache), cache.xᵀVᵀ, U_part, Vᵀ_part, - _vec(cache.dfu)) - denom = dot(cache.vᵀ_cache, cache.dfu) - cache.u_cache .= (du .- cache.u_cache) ./ ifelse(iszero(denom), T(1e-5), denom) - - idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 1)) - selectdim(cache.U, 1, idx) .= _vec(cache.u_cache) - selectdim(cache.Vᵀ, 2, idx) .= _vec(cache.vᵀ_cache) - - idx = min(cache.iterations_since_reset + 1, size(cache.U, 1)) - U_part = selectdim(cache.U, 1, 1:idx) - Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) - __lbroyden_matvec!(_vec(cache.du), cache.Ux, U_part, Vᵀ_part, _vec(cache.fu2)) - cache.iterations_since_reset += 1 - end - - cache.u_prev .= cache.u - cache.fu .= cache.fu2 - - return nothing + return LimitedMemoryBroydenCache{iip}(f, alg, u, u_cache, du, fu, fu_cache, dfu, p, + U, Vᵀ, threshold_cache, mat_cache, vᵀ_cache, false, 0, 0, alg.max_resets, maxiters, + internalnorm, ReturnCode.Default, abstol, reltol, reset_tolerance, reset_check, + prob, NLStats(1, 0, 0, 0, 0), + init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), tc_cache, trace) end -function perform_step!(cache::LimitedMemoryBroydenCache{false}) - @unpack f, p = cache - +function perform_step!(cache::LimitedMemoryBroydenCache{iip}) where {iip} T = eltype(cache.u) α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) - cache.u = cache.u .- α * cache.du - cache.fu2 = f(cache.u, p) + @bb axpy!(-α, cache.du, cache.u) + evaluate_f(cache, cache.u, cache.p) - idx = min(cache.iterations_since_reset, size(cache.U, 1)) - U_part = selectdim(cache.U, 1, 1:idx) - Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), cache.fu2, + idx = min(cache.iterations_since_reset, size(cache.U, 2)) + U_part = selectdim(cache.U, 2, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 1, 1:idx) + update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), cache.fu, ApplyArray(*, Vᵀ_part, U_part), cache.du, α) - check_and_update!(cache, cache.fu2, cache.u, cache.u_prev) + check_and_update!(cache, cache.fu, cache.u, cache.u_cache) cache.stats.nf += 1 cache.force_stop && return nothing # Update the Inverse Jacobian Approximation - cache.dfu .= cache.fu2 .- cache.fu + @bb @. cache.dfu = cache.fu - cache.fu_cache # Only try to reset if we have enough iterations since last reset if cache.iterations_since_reset > size(cache.U, 1) && @@ -197,102 +145,75 @@ function perform_step!(cache::LimitedMemoryBroydenCache{false}) end cache.iterations_since_reset = 0 cache.resets += 1 - cache.du = cache.fu + @bb copyto!(cache.du, cache.fu) else - cache.du = -cache.du - idx = min(cache.iterations_since_reset, size(cache.U, 1)) - U_part = selectdim(cache.U, 1, 1:idx) - Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) - - cache.vᵀ_cache = _restructure(cache.vᵀ_cache, - __lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.du))) - cache.u_cache = _restructure(cache.u_cache, - __lbroyden_rmatvec(U_part, Vᵀ_part, _vec(cache.dfu))) + @bb cache.du .*= -1 + + cache.vᵀ_cache = _rmatvec!!(cache.vᵀ_cache, cache.threshold_cache, U_part, Vᵀ_part, + cache.du) + cache.mat_cache = _matvec!!(cache.mat_cache, cache.threshold_cache, U_part, Vᵀ_part, + cache.dfu) + denom = dot(cache.vᵀ_cache, cache.dfu) - cache.u_cache = (cache.du .- cache.u_cache) ./ ifelse(iszero(denom), T(1e-5), denom) + @bb @. cache.u_cache = (cache.du - cache.mat_cache) / + ifelse(iszero(denom), T(1e-5), denom) + + idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 2)) + selectdim(cache.U, 2, idx) .= _vec(cache.u_cache) + selectdim(cache.Vᵀ, 1, idx) .= _vec(cache.vᵀ_cache) - idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 1)) - selectdim(cache.U, 1, idx) .= _vec(cache.u_cache) - selectdim(cache.Vᵀ, 2, idx) .= _vec(cache.vᵀ_cache) + idx = min(cache.iterations_since_reset + 1, size(cache.U, 2)) + U_part = selectdim(cache.U, 2, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 1, 1:idx) + cache.du = _matvec!!(cache.du, cache.threshold_cache, U_part, Vᵀ_part, cache.fu) - idx = min(cache.iterations_since_reset + 1, size(cache.U, 1)) - U_part = selectdim(cache.U, 1, 1:idx) - Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) - cache.du = _restructure(cache.du, - __lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.fu2))) cache.iterations_since_reset += 1 end - cache.u_prev = @. cache.u - cache.fu = cache.fu2 + @bb copyto!(cache.u_cache, cache.u) + @bb copyto!(cache.fu_cache, cache.fu) return nothing end -function SciMLBase.reinit!(cache::LimitedMemoryBroydenCache{iip}, u0 = cache.u; p = cache.p, - termination_condition = get_termination_mode(cache.tc_cache), - abstol = cache.abstol, reltol = cache.reltol, maxiters = cache.maxiters) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - cache.f(cache.fu, cache.u, p) - else - # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter - cache.u = u0 - cache.fu = cache.f(cache.u, p) - end - - reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, cache.fu, cache.u, - termination_condition) - - cache.abstol = abstol - cache.reltol = reltol - cache.tc_cache = tc_cache - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 - cache.resets = 0 +function __reinit_internal!(cache::LimitedMemoryBroydenCache) cache.iterations_since_reset = 0 - cache.force_stop = false - cache.retcode = ReturnCode.Default - return cache + return nothing end -@views function __lbroyden_matvec!(y::AbstractVector, Ux::AbstractVector, - U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) - # Computes Vᵀ × U × x - η = size(U, 1) +function _rmatvec!!(y, xᵀU, U, Vᵀ, x) + # xᵀ × (-I + UVᵀ) + η = size(U, 2) if η == 0 - y .= x - return nothing + @bb @. y = -x + return y end - mul!(Ux[1:η], U, x) - mul!(y, Vᵀ[:, 1:η], Ux[1:η]) - return nothing -end - -@views function __lbroyden_matvec(U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) - # Computes Vᵀ × U × x - size(U, 1) == 0 && return x - return Vᵀ * (U * x) + x_ = vec(x) + xᵀU_ = view(xᵀU, 1:η) + @bb xᵀU_ = transpose(U) × x_ + @bb y = transpose(Vᵀ) × xᵀU_ + @bb @. y -= x + return y end -@views function __lbroyden_rmatvec!(y::AbstractVector, xᵀVᵀ::AbstractMatrix, - U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) - # Computes xᵀ × Vᵀ × U - η = size(U, 1) +function _matvec!!(y, Vᵀx, U, Vᵀ, x) + # (-I + UVᵀ) × x + η = size(U, 2) if η == 0 - y .= x - return nothing + @bb @. y = -x + return y end - mul!(xᵀVᵀ[:, 1:η], x', Vᵀ) - mul!(reshape(y, 1, :), xᵀVᵀ[:, 1:η], U) - return nothing + x_ = vec(x) + Vᵀx_ = view(Vᵀx, 1:η) + @bb Vᵀx_ = Vᵀ × x_ + @bb y = U × Vᵀx_ + @bb @. y -= x + return y end -@views function __lbroyden_rmatvec(U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) - # Computes xᵀ × Vᵀ × U - size(U, 1) == 0 && return x - return (reshape(x, 1, :) * Vᵀ) * U +@inline function __lbroyden_threshold_cache(x, ::Val{threshold}) where {threshold} + return similar(x, threshold) +end +@inline function __lbroyden_threshold_cache(x::SArray, ::Val{threshold}) where {threshold} + return zeros(SVector{threshold, eltype(x)}) end From 21e9ed4ce36598691f73250afeff73ee16484f1c Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 18:48:45 -0500 Subject: [PATCH 08/25] Nearly finished GN --- src/NonlinearSolve.jl | 34 ++++++--- src/broyden.jl | 2 +- src/gaussnewton.jl | 164 ++++++++++++----------------------------- src/jacobian.jl | 5 +- src/klement.jl | 4 +- src/lbroyden.jl | 7 +- src/levenberg.jl | 2 +- src/pseudotransient.jl | 7 +- src/raphson.jl | 2 +- src/utils.jl | 6 +- 10 files changed, 89 insertions(+), 144 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index ae1814901..63d462f18 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -51,8 +51,8 @@ isinplace(::AbstractNonlinearSolveCache{iip}) where {iip} = iip function SciMLBase.reinit!(cache::AbstractNonlinearSolveCache{iip}, u0 = get_u(cache); p = cache.p, abstol = cache.abstol, reltol = cache.reltol, - maxiters = cache.maxiters, alias_u0 = false, - termination_condition = get_termination_mode(cache.tc_cache)) where {iip} + maxiters = cache.maxiters, alias_u0 = false, termination_condition = missing, + kwargs...) where {iip} cache.p = p if iip recursivecopy!(get_u(cache), u0) @@ -63,24 +63,40 @@ function SciMLBase.reinit!(cache::AbstractNonlinearSolveCache{iip}, u0 = get_u(c end reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, get_fu(cache), - get_u(cache), termination_condition) + + # Some algorithms store multiple termination caches + if hasfield(typeof(cache), :tc_cache) + # TODO: We need an efficient way to reset this upstream + tc = termination_condition === missing ? get_termination_mode(cache.tc_cache) : + termination_condition + abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, get_fu(cache), + get_u(cache), tc) + cache.tc_cache = tc_cache + end + + if hasfield(typeof(cache), :ls_cache) + # TODO: A more efficient way to do this + cache.ls_cache = init_linesearch_cache(cache.prob, cache.alg.linesearch, cache.f, + get_u(cache), p, get_fu(cache), Val(iip)) + end + + hasfield(typeof(cache), :uf) && (cache.uf.p = p) cache.abstol = abstol cache.reltol = reltol - cache.tc_cache = tc_cache cache.maxiters = maxiters cache.stats.nf = 1 cache.stats.nsteps = 1 cache.force_stop = false cache.retcode = ReturnCode.Default - __reinit_internal!(cache) + __reinit_internal!(cache; u0, p, abstol, reltol, maxiters, alias_u0, + termination_condition, kwargs...) return cache end -__reinit_internal!(::AbstractNonlinearSolveCache) = nothing +__reinit_internal!(::AbstractNonlinearSolveCache; kwargs...) = nothing function Base.show(io::IO, alg::AbstractNonlinearSolveAlgorithm) str = "$(nameof(typeof(alg)))(" @@ -155,14 +171,14 @@ include("linesearch.jl") include("raphson.jl") # include("trustRegion.jl") # include("levenberg.jl") -# include("gaussnewton.jl") +include("gaussnewton.jl") # include("dfsane.jl") include("pseudotransient.jl") include("broyden.jl") include("klement.jl") include("lbroyden.jl") include("jacobian.jl") -# include("ad.jl") +include("ad.jl") # include("default.jl") # @setup_workload begin diff --git a/src/broyden.jl b/src/broyden.jl index 504e16912..9b165e513 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -126,7 +126,7 @@ function perform_step!(cache::GeneralBroydenCache{iip}) where {iip} return nothing end -function __reinit_internal!(cache::GeneralBroydenCache) +function __reinit_internal!(cache::GeneralBroydenCache; kwargs...) cache.J⁻¹ = __reinit_identity_jacobian!!(cache.J⁻¹) cache.resets = 0 return nothing diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl index ea1855e68..1b4fc9432 100644 --- a/src/gaussnewton.jl +++ b/src/gaussnewton.jl @@ -46,9 +46,8 @@ function set_ad(alg::GaussNewton{CJ}, ad) where {CJ} return GaussNewton{CJ}(ad, alg.linsolve, alg.precs, alg.linesearch, alg.vjp_autodiff) end -function GaussNewton(; concrete_jac = nothing, linsolve = nothing, - linesearch = nothing, precs = DEFAULT_PRECS, vjp_autodiff = nothing, - adkwargs...) +function GaussNewton(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS, + linesearch = nothing, vjp_autodiff = nothing, adkwargs...) ad = default_adargs_to_adtype(; adkwargs...) linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) return GaussNewton{_unwrap_val(concrete_jac)}(ad, linsolve, precs, linesearch, @@ -59,11 +58,11 @@ end f alg u - u_prev - fu1 - fu2 - fu_new + u_cache + fu + fu_cache du + dfu p uf linsolve @@ -92,109 +91,57 @@ function SciMLBase.__init(prob::NonlinearLeastSquaresProblem{uType, iip}, alg_:: alg = get_concrete_algorithm(alg_, prob) @unpack f, u0, p = prob - linsolve_with_JᵀJ = Val(_needs_square_A(alg, u0)) + u = __maybe_unaliased(u0, alias_u0) + fu = evaluate_f(prob, u) - u = alias_u0 ? u0 : deepcopy(u0) - fu1 = evaluate_f(prob, u) + uf, linsolve, J, fu_cache, jac_cache, du, JᵀJ, Jᵀf = jacobian_caches(alg, f, u, p, + Val(iip); linsolve_with_JᵀJ = Val(__needs_square_A(alg, u))) - if SciMLBase._unwrap_val(linsolve_with_JᵀJ) - uf, linsolve, J, fu2, jac_cache, du, JᵀJ, Jᵀf = jacobian_caches(alg, f, u, p, - Val(iip); linsolve_with_JᵀJ) - else - uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, - Val(iip); linsolve_with_JᵀJ) - JᵀJ, Jᵀf = nothing, nothing - end - - abstol, reltol, tc_cache_1 = init_termination_cache(abstol, reltol, fu1, u, + abstol, reltol, tc_cache_1 = init_termination_cache(abstol, reltol, fu, u, termination_condition) - _, _, tc_cache_2 = init_termination_cache(abstol, reltol, fu1, u, termination_condition) - trace = init_nonlinearsolve_trace(alg, u, fu1, ApplyArray(__zero, J), du; kwargs...) + _, _, tc_cache_2 = init_termination_cache(abstol, reltol, fu, u, termination_condition) + trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(__zero, J), du; kwargs...) - return GaussNewtonCache{iip}(f, alg, u, copy(u), fu1, fu2, zero(fu1), du, p, uf, + @bb u_cache = copy(u) + @bb dfu = copy(fu) + + return GaussNewtonCache{iip}(f, alg, u, u_cache, fu, fu_cache, du, dfu, p, uf, linsolve, J, JᵀJ, Jᵀf, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache_1, tc_cache_2, init_linesearch_cache(alg.linesearch, f, u, p, fu1, Val(iip)), trace) end -function perform_step!(cache::GaussNewtonCache{true}) - @unpack u, u_prev, fu1, f, p, alg, J, JᵀJ, Jᵀf, linsolve, du = cache - jacobian!!(J, cache) - - if JᵀJ !== nothing - __update_JᵀJ!(Val{true}(), cache, :JᵀJ, J) - __update_Jᵀf!(Val{true}(), cache, :Jᵀf, :JᵀJ, J, fu1) - end - - # u = u - JᵀJ \ Jᵀfu - if cache.JᵀJ === nothing - linres = dolinsolve(alg.precs, linsolve; A = J, b = _vec(fu1), linu = _vec(du), - p, reltol = cache.abstol) - else - linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(JᵀJ), b = _vec(Jᵀf), - linu = _vec(du), p, reltol = cache.abstol) - end - cache.linsolve = linres.cache - α = perform_linesearch!(cache.ls_cache, u, du) - _axpy!(-α, du, u) - f(cache.fu_new, u, p) - - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), J, - cache.du, α) - - check_and_update!(cache.tc_cache_1, cache, cache.fu_new, cache.u, cache.u_prev) - if !cache.force_stop - cache.fu1 .= cache.fu_new .- cache.fu1 - check_and_update!(cache.tc_cache_2, cache, cache.fu1, cache.u, cache.u_prev) - end - - @. u_prev = u - cache.fu1 .= cache.fu_new - cache.stats.nf += 1 - cache.stats.njacs += 1 - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 - return nothing -end - -function perform_step!(cache::GaussNewtonCache{false}) - @unpack u, u_prev, fu1, f, p, alg, linsolve = cache - +function perform_step!(cache::GaussNewtonCache{iip}) where {iip} cache.J = jacobian!!(cache.J, cache) + # Use normal form to solve the Linear Problem if cache.JᵀJ !== nothing - __update_JᵀJ!(Val{false}(), cache, :JᵀJ, cache.J) - __update_Jᵀf!(Val{false}(), cache, :Jᵀf, :JᵀJ, cache.J, fu1) - end - - # u = u - J \ fu - if linsolve === nothing - cache.du = fu1 / cache.J + __update_JᵀJ!(Val{iip}(), cache, :JᵀJ, cache.J) + __update_Jᵀf!(Val{iip}(), cache, :Jᵀf, :JᵀJ, cache.J, cache.fu1) + A, b = __maybe_symmetric(cache.JᵀJ), _vec(cache.Jᵀf) else - if cache.JᵀJ === nothing - linres = dolinsolve(alg.precs, linsolve; A = cache.J, b = _vec(fu1), - linu = _vec(cache.du), p, reltol = cache.abstol) - else - linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.JᵀJ), - b = _vec(cache.Jᵀf), linu = _vec(cache.du), p, reltol = cache.abstol) - end - cache.linsolve = linres.cache + A, b = cache.J, _vec(cache.fu) end - α = perform_linesearch!(cache.ls_cache, u, cache.du) - cache.u = @. u - α * cache.du # `u` might not support mutation - cache.fu_new = f(cache.u, p) - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, - cache.du, α) + linres = dolinsolve(alg.precs, linsolve; A, b, linu = _vec(du), cache.p, + reltol = cache.abstol) + cache.linsolve = linres.cache + cache.du = _restructure(cache.du, linres.u) + + α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) + @bb axpy!(-α, cache.du, cache.u) + evaluate_f(cache, cache.u, cache.p) + update_trace!(cache, α) - check_and_update!(cache.tc_cache_1, cache, cache.fu_new, cache.u, cache.u_prev) + check_and_update!(cache.tc_cache_1, cache, cache.fu, cache.u, cache.u_cache) if !cache.force_stop - cache.fu1 = cache.fu_new .- cache.fu1 - check_and_update!(cache.tc_cache_2, cache, cache.fu1, cache.u, cache.u_prev) + @bb @. cache.dfu = cache.fu .- cache.dfu + check_and_update!(cache.tc_cache_2, cache, cache.dfu, cache.u, cache.u_prev) end - cache.u_prev = cache.u - cache.fu1 = cache.fu_new + @bb copyto!(cache.u_cache, cache.u) + @bb copyto!(cache.dfu, cache.fu) + cache.stats.nf += 1 cache.stats.njacs += 1 cache.stats.nsolve += 1 @@ -202,33 +149,16 @@ function perform_step!(cache::GaussNewtonCache{false}) return nothing end -function SciMLBase.reinit!(cache::GaussNewtonCache{iip}, u0 = cache.u; p = cache.p, - abstol = cache.abstol, reltol = cache.reltol, maxiters = cache.maxiters, - termination_condition = get_termination_mode(cache.tc_cache)) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - cache.f(cache.fu1, cache.u, p) - else - # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter - cache.u = u0 - cache.fu1 = cache.f(cache.u, p) - end +function __reinit_internal!(cache::GaussNewtonCache; + termination_condition = get_termination_mode(cache.tc_cache_1), kwargs...) + abstol, reltol, tc_cache_1 = init_termination_cache(cache.abstol, cache.reltol, + cache.fu1, cache.u, termination_condition) + _, _, tc_cache_2 = init_termination_cache(cache.abstol, cache.reltol, cache.fu1, + cache.u, termination_condition) - reset!(cache.trace) - abstol, reltol, tc_cache_1 = init_termination_cache(abstol, reltol, cache.fu1, cache.u, - termination_condition) - _, _, tc_cache_2 = init_termination_cache(abstol, reltol, cache.fu1, cache.u, - termination_condition) - - cache.abstol = abstol - cache.reltol = reltol cache.tc_cache_1 = tc_cache_1 cache.tc_cache_2 = tc_cache_2 - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 - cache.force_stop = false - cache.retcode = ReturnCode.Default - return cache + cache.abstol = abstol + cache.reltol = reltol + return nothing end diff --git a/src/jacobian.jl b/src/jacobian.jl index 6747da1a8..2ffcdc9aa 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -113,6 +113,8 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val JᵀJ, Jᵀfu = __init_JᵀJ(J, _vec(fu), uf, u; f, vjp_autodiff = __get_nonsparse_ad(__getproperty(alg, Val(:vjp_autodiff))), jvp_autodiff = __get_nonsparse_ad(alg.ad)) + else + JᵀJ, Jᵀfu = nothing, nothing end if linsolve_init @@ -127,8 +129,7 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val linsolve = nothing end - needsJᵀJ && return uf, linsolve, J, fu, jac_cache, du, JᵀJ, Jᵀfu - return uf, linsolve, J, fu, jac_cache, du + return uf, linsolve, J, fu, jac_cache, du, JᵀJ, Jᵀfu end ## Special Handling for Scalars diff --git a/src/klement.jl b/src/klement.jl index 37d6a6c07..cceb51c3c 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -126,7 +126,7 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} linres = dolinsolve(alg.precs, cache.linsolve; A, b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - !iip && (cache.du = linres.u) + cache.du = _restructure(cache.du, linres.u) # Line Search α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) @@ -163,7 +163,7 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} return nothing end -function __reinit_internal!(cache::GeneralKlementCache) +function __reinit_internal!(cache::GeneralKlementCache; kwargs...) cache.J = __reinit_identity_jacobian!!(cache.J) cache.resets = 0 return nothing diff --git a/src/lbroyden.jl b/src/lbroyden.jl index 8882a2645..611e5511b 100644 --- a/src/lbroyden.jl +++ b/src/lbroyden.jl @@ -73,7 +73,6 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemory @unpack f, u0, p = prob threshold = __get_threshold(alg) η = min(__get_unwrapped_threshold(alg), maxiters) - if u0 isa Number || length(u0) ≤ η # If u is a number or very small problem then we simply use Broyden return SciMLBase.__init(prob, @@ -81,13 +80,11 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemory alias_u0, maxiters, abstol, internalnorm, kwargs...) end u = __maybe_unaliased(u0, alias_u0) - fu = evaluate_f(prob, u) - U, Vᵀ = __init_low_rank_jacobian(u, fu, threshold) @bb du = copy(fu) - @bb u_cache = similar(u) + @bb u_cache = copy(u) @bb fu_cache = copy(fu) @bb dfu = similar(fu) @bb vᵀ_cache = similar(u) @@ -176,7 +173,7 @@ function perform_step!(cache::LimitedMemoryBroydenCache{iip}) where {iip} return nothing end -function __reinit_internal!(cache::LimitedMemoryBroydenCache) +function __reinit_internal!(cache::LimitedMemoryBroydenCache; kwargs...) cache.iterations_since_reset = 0 return nothing end diff --git a/src/levenberg.jl b/src/levenberg.jl index 94e882223..3b523807c 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -173,7 +173,7 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, u = alias_u0 ? u0 : deepcopy(u0) fu1 = evaluate_f(prob, u) - linsolve_with_JᵀJ = Val(_needs_square_A(alg, u0)) + linsolve_with_JᵀJ = Val(__needs_square_A(alg, u0)) if _unwrap_val(linsolve_with_JᵀJ) uf, linsolve, J, fu2, jac_cache, du, JᵀJ, v = jacobian_caches(alg, f, u, p, diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index c6f1926f1..b01762493 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -130,7 +130,7 @@ function perform_step!(cache::PseudoTransientCache{iip}) where {iip} linres = dolinsolve(alg.precs, cache.linsolve; A, b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - !iip && (cache.du = linres.u) + cache.du = _restructure(cache.du, linres.u) @bb axpy!(-true, cache.du, cache.u) @@ -152,8 +152,9 @@ function perform_step!(cache::PseudoTransientCache{iip}) where {iip} return nothing end -function __reinit_internal!(cache::PseudoTransientCache) - cache.alpha = convert(eltype(cache.u), cache.alg.alpha_initial) +function __reinit_internal!(cache::PseudoTransientCache; alpha = cache.alg.alpha_initial, + kwargs...) + cache.alpha = convert(eltype(cache.u), alpha) cache.res_norm = cache.internalnorm(cache.fu) return nothing end diff --git a/src/raphson.jl b/src/raphson.jl index 835fadd48..ac40b7c64 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -107,7 +107,7 @@ function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} linres = dolinsolve(alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - !iip && (cache.du = linres.u) + cache.du = _restructure(cache.du, linres.u) # Line Search α = perform_linesearch!(cache.ls_cache, cache.u, cache.du) diff --git a/src/utils.jl b/src/utils.jl index c6b670f8b..46c5b9295 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -379,9 +379,9 @@ __try_factorize_and_check_singular!(::FakeLinearSolveJLCache, x) = _issingular(x end # Non-square matrix -@inline _needs_square_A(_, ::Number) = true -@inline _needs_square_A(_, ::StaticArray) = true -@inline _needs_square_A(alg, _) = LinearSolve.needs_square_A(alg.linsolve) +@inline __needs_square_A(_, ::Number) = true +# @inline __needs_square_A(_, ::StaticArray) = true +@inline __needs_square_A(alg, _) = LinearSolve.needs_square_A(alg.linsolve) # Define special concatenation for certain Array combinations @inline _vcat(x, y) = vcat(x, y) From 0e3efd72170b801c6f50e226435c0816b9c56aff Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 19:32:32 -0500 Subject: [PATCH 09/25] Fix GN --- src/gaussnewton.jl | 10 +++++----- src/utils.jl | 9 +++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl index 1b4fc9432..5ff01d79a 100644 --- a/src/gaussnewton.jl +++ b/src/gaussnewton.jl @@ -108,7 +108,7 @@ function SciMLBase.__init(prob::NonlinearLeastSquaresProblem{uType, iip}, alg_:: return GaussNewtonCache{iip}(f, alg, u, u_cache, fu, fu_cache, du, dfu, p, uf, linsolve, J, JᵀJ, Jᵀf, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache_1, tc_cache_2, - init_linesearch_cache(alg.linesearch, f, u, p, fu1, Val(iip)), trace) + init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), trace) end function perform_step!(cache::GaussNewtonCache{iip}) where {iip} @@ -117,14 +117,14 @@ function perform_step!(cache::GaussNewtonCache{iip}) where {iip} # Use normal form to solve the Linear Problem if cache.JᵀJ !== nothing __update_JᵀJ!(Val{iip}(), cache, :JᵀJ, cache.J) - __update_Jᵀf!(Val{iip}(), cache, :Jᵀf, :JᵀJ, cache.J, cache.fu1) + __update_Jᵀf!(Val{iip}(), cache, :Jᵀf, :JᵀJ, cache.J, cache.fu) A, b = __maybe_symmetric(cache.JᵀJ), _vec(cache.Jᵀf) else A, b = cache.J, _vec(cache.fu) end - linres = dolinsolve(alg.precs, linsolve; A, b, linu = _vec(du), cache.p, - reltol = cache.abstol) + linres = dolinsolve(cache.alg.precs, cache.linsolve; A, b, linu = _vec(cache.du), + cache.p, reltol = cache.abstol) cache.linsolve = linres.cache cache.du = _restructure(cache.du, linres.u) @@ -136,7 +136,7 @@ function perform_step!(cache::GaussNewtonCache{iip}) where {iip} check_and_update!(cache.tc_cache_1, cache, cache.fu, cache.u, cache.u_cache) if !cache.force_stop @bb @. cache.dfu = cache.fu .- cache.dfu - check_and_update!(cache.tc_cache_2, cache, cache.dfu, cache.u, cache.u_prev) + check_and_update!(cache.tc_cache_2, cache, cache.dfu, cache.u, cache.u_cache) end @bb copyto!(cache.u_cache, cache.u) diff --git a/src/utils.jl b/src/utils.jl index 46c5b9295..00b7d3726 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -188,6 +188,15 @@ function evaluate_f(prob::Union{NonlinearProblem{uType, iip}, return fu end +function evaluate_f(f, u, p, ::Val{iip}; fu = nothing) where {iip} + if iip + f(fu, u, p) + return fu + else + return f(u, p) + end +end + function evaluate_f(cache, u, p) if isinplace(cache) cache.prob.f(get_fu(cache), u, p) From 031639f1a663bdca9b417dc376bea80f09277656 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 20:11:28 -0500 Subject: [PATCH 10/25] Fix DFSane --- src/NonlinearSolve.jl | 2 +- src/dfsane.jl | 229 ++++++++++++------------------------------ src/utils.jl | 15 --- 3 files changed, 63 insertions(+), 183 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 63d462f18..dacc98910 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -172,7 +172,7 @@ include("raphson.jl") # include("trustRegion.jl") # include("levenberg.jl") include("gaussnewton.jl") -# include("dfsane.jl") +include("dfsane.jl") include("pseudotransient.jl") include("broyden.jl") include("klement.jl") diff --git a/src/dfsane.jl b/src/dfsane.jl index 4b31ff9f3..8dcb1e9ff 100644 --- a/src/dfsane.jl +++ b/src/dfsane.jl @@ -1,8 +1,8 @@ """ - DFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0, - M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5, - n_exp::Int = 2, η_strategy::Function = (fn_1, n, x_n, f_n) -> fn_1 / n^2, - max_inner_iterations::Int = 1000) + DFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0, M::Int = 10, + γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5, n_exp::Int = 2, + η_strategy::Function = (fn_1, n, x_n, f_n) -> fn_1 / n^2, + max_inner_iterations::Int = 100) A low-overhead and allocation-free implementation of the df-sane method for solving large-scale nonlinear systems of equations. For in depth information about all the parameters and the algorithm, @@ -39,34 +39,27 @@ Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_ `f_n` the current residual. Should satisfy ``η > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to ``fn_1 / n^2``. - `max_inner_iterations`: the maximum number of iterations allowed for the inner loop of the - algorithm. Defaults to `1000`. + algorithm. Defaults to `100`. """ -@concrete struct DFSane <: AbstractNonlinearSolveAlgorithm - σ_min - σ_max - σ_1 - M::Int - γ - τ_min - τ_max - n_exp::Int - η_strategy - max_inner_iterations::Int -end - -function DFSane(; σ_min = 1e-10, σ_max = 1e+10, σ_1 = 1.0, M = 10, γ = 1e-4, τ_min = 0.1, - τ_max = 0.5, n_exp = 2, η_strategy::F = (fn_1, n, x_n, f_n) -> fn_1 / n^2, - max_inner_iterations = 1000) where {F} - return DFSane(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, n_exp, η_strategy, - max_inner_iterations) +@kwdef @concrete struct DFSane <: AbstractNonlinearSolveAlgorithm + σ_min = 1e-10 + σ_max = 1e10 + σ_1 = 1.0 + M::Int = 10 + γ = 1e-4 + τ_min = 0.1 + τ_max = 0.5 + n_exp::Int = 2 + η_strategy = (fn_1, n, x_n, f_n) -> fn_1 / n^2 + max_inner_iterations::Int = 100 end @concrete mutable struct DFSaneCache{iip} <: AbstractNonlinearSolveCache{iip} alg u - uprev + u_cache fu - fuprev + fu_cache du history f_norm @@ -93,36 +86,35 @@ end trace end -get_fu(cache::DFSaneCache) = cache.fu -set_fu!(cache::DFSaneCache, fu) = (cache.fu = fu) - function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::DFSane, args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, termination_condition = nothing, internalnorm::F = DEFAULT_NORM, kwargs...) where {uType, iip, F} - u = alias_u0 ? prob.u0 : deepcopy(prob.u0) + u = __maybe_unaliased(prob.u0, alias_u0) T = eltype(u) - du, uprev = copy(u), copy(u) + @bb du = similar(u) + @bb u_cache = copy(u) + fu = evaluate_f(prob, u) - fuprev = copy(fu) + @bb fu_cache = copy(fu) f_norm = internalnorm(fu)^alg.n_exp f_norm_0 = f_norm history = fill(f_norm, alg.M) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, uprev, + abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u_cache, termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, nothing, du; kwargs...) - return DFSaneCache{iip}(alg, u, uprev, fu, fuprev, du, history, f_norm, f_norm_0, alg.M, - T(alg.σ_1), T(alg.σ_min), T(alg.σ_max), one(T), T(alg.γ), T(alg.τ_min), + return DFSaneCache{iip}(alg, u, u_cache, fu, fu_cache, du, history, f_norm, f_norm_0, + alg.M, T(alg.σ_1), T(alg.σ_min), T(alg.σ_max), one(T), T(alg.γ), T(alg.τ_min), T(alg.τ_max), alg.n_exp, prob.p, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache, trace) end -function perform_step!(cache::DFSaneCache{true}) +function perform_step!(cache::DFSaneCache{iip}) where {iip} @unpack alg, f_norm, σ_n, σ_min, σ_max, α_1, γ, τ_min, τ_max, n_exp, M, prob = cache T = eltype(cache.u) f_norm_old = f_norm @@ -131,128 +123,64 @@ function perform_step!(cache::DFSaneCache{true}) σ_n = sign(σ_n) * clamp(abs(σ_n), σ_min, σ_max) # Line search direction - @. cache.du = -σ_n * cache.fuprev + @bb @. cache.du = -σ_n * cache.fu η = alg.η_strategy(cache.f_norm_0, cache.stats.nsteps, cache.u, cache.fu) f_bar = maximum(cache.history) α₊ = α_1 α₋ = α_1 - _axpy!(α₊, cache.du, cache.u) - - prob.f(cache.fu, cache.u, cache.p) - f_norm = cache.internalnorm(cache.fu)^n_exp - - # TODO: Failure mode with inner line search failed? - for _ in 1:(cache.alg.max_inner_iterations) - c = f_bar + η - γ * α₊^2 * f_norm_old - - f_norm ≤ c && break - - α₊ = α₊ * clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old), - τ_min, τ_max) - @. cache.u = cache.uprev - α₋ * cache.du - - prob.f(cache.fu, cache.u, cache.p) - f_norm = cache.internalnorm(cache.fu)^n_exp - - f_norm ≤ c && break - - α₋ = α₋ * clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old), - τ_min, τ_max) - @. cache.u = cache.uprev + α₊ * cache.du - - prob.f(cache.fu, cache.u, cache.p) - f_norm = cache.internalnorm(cache.fu)^n_exp - end - - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), nothing, - cache.du, α₊) - check_and_update!(cache, cache.fu, cache.u, cache.uprev) + @bb axpy!(α₊, cache.du, cache.u) - # Update spectral parameter - @. cache.uprev = cache.u - cache.uprev - @. cache.fuprev = cache.fu - cache.fuprev - - α₊ = sum(abs2, cache.uprev) - @. cache.uprev *= cache.fuprev - α₋ = sum(cache.uprev) - cache.σ_n = α₊ / α₋ - - # Spectral parameter bounds check - if !(σ_min ≤ abs(cache.σ_n) ≤ σ_max) - test_norm = sqrt(sum(abs2, cache.fuprev)) - cache.σ_n = clamp(inv(test_norm), T(1), T(1e5)) - end - - # Take step - @. cache.uprev = cache.u - @. cache.fuprev = cache.fu - cache.f_norm = f_norm - - # Update history - cache.history[cache.stats.nsteps % M + 1] = f_norm - cache.stats.nf += 1 - return nothing -end - -function perform_step!(cache::DFSaneCache{false}) - @unpack alg, f_norm, σ_n, σ_min, σ_max, α_1, γ, τ_min, τ_max, n_exp, M, prob = cache - T = eltype(cache.u) - f_norm_old = f_norm - - # Spectral parameter range check - σ_n = sign(σ_n) * clamp(abs(σ_n), σ_min, σ_max) - - # Line search direction - cache.du = @. -σ_n * cache.fuprev - - η = alg.η_strategy(cache.f_norm_0, cache.stats.nsteps, cache.u, cache.fu) - - f_bar = maximum(cache.history) - α₊ = α_1 - α₋ = α_1 - cache.u = @. cache.uprev + α₊ * cache.du - - cache.fu = prob.f(cache.u, cache.p) + evaluate_f(cache, cache.u, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp + α = α₊ - # TODO: Failure mode with inner line search failed? - for _ in 1:(cache.alg.max_inner_iterations) - c = f_bar + η - γ * α₊^2 * f_norm_old - - f_norm ≤ c && break + inner_converged = false + for k in 1:(cache.alg.max_inner_iterations) + if f_norm ≤ f_bar + η - γ * α₊^2 * f_norm_old + α = α₊ + inner_converged = true + break + end α₊ = α₊ * clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old), τ_min, τ_max) - cache.u = @. cache.uprev - α₋ * cache.du + @bb axpy!(-α₋, cache.du, cache.u) - cache.fu = prob.f(cache.u, cache.p) + evaluate_f(cache, cache.u, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp - f_norm ≤ c && break + if f_norm ≤ f_bar + η - γ * α₋^2 * f_norm_old + α = α₋ + inner_converged = true + break + end α₋ = α₋ * clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old), τ_min, τ_max) - cache.u = @. cache.uprev + α₊ * cache.du + @bb axpy!(α₊, cache.du, cache.u) - cache.fu = prob.f(cache.u, cache.p) + evaluate_f(cache, cache.u, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp end - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), nothing, - cache.du, α₊) + if !inner_converged + cache.retcode = ReturnCode.ConvergenceFailure + cache.force_stop = true + end - check_and_update!(cache, cache.fu, cache.u, cache.uprev) + update_trace!(cache, α) + check_and_update!(cache, cache.fu, cache.u, cache.u_cache) # Update spectral parameter - cache.uprev = @. cache.u - cache.uprev - cache.fuprev = @. cache.fu - cache.fuprev + @bb @. cache.u_cache = cache.u - cache.u_cache + @bb @. cache.fu_cache = cache.fu - cache.fu_cache - α₊ = sum(abs2, cache.uprev) - cache.uprev = @. cache.uprev * cache.fuprev - α₋ = sum(cache.uprev) + α₊ = sum(abs2, cache.u_cache) + @bb @. cache.u_cache *= cache.fu_cache + α₋ = sum(cache.u_cache) cache.σ_n = α₊ / α₋ # Spectral parameter bounds check @@ -262,8 +190,8 @@ function perform_step!(cache::DFSaneCache{false}) end # Take step - cache.uprev = cache.u - cache.fuprev = cache.fu + @bb copyto!(cache.u_cache, cache.u) + @bb copyto!(cache.fu_cache, cache.fu) cache.f_norm = f_norm # Update history @@ -272,41 +200,8 @@ function perform_step!(cache::DFSaneCache{false}) return nothing end -function SciMLBase.reinit!(cache::DFSaneCache{iip}, u0 = cache.u; p = cache.p, - abstol = cache.abstol, reltol = cache.reltol, maxiters = cache.maxiters, - termination_condition = get_termination_mode(cache.tc_cache)) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - recursivecopy!(cache.uprev, u0) - cache.prob.f(cache.fu, cache.u, p) - cache.prob.f(cache.fuprev, cache.uprev, p) - else - cache.u = u0 - cache.uprev = u0 - cache.fu = cache.prob.f(cache.u, p) - cache.fuprev = cache.prob.f(cache.uprev, p) - end - +function __reinit_internal!(cache::DFSaneCache; kwargs...) cache.f_norm = cache.internalnorm(cache.fu)^cache.n_exp cache.f_norm_0 = cache.f_norm - - fill!(cache.history, cache.f_norm) - - T = eltype(cache.u) - cache.σ_n = T(cache.alg.σ_1) - - reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, cache.fu, cache.u, - termination_condition) - - cache.abstol = abstol - cache.reltol = reltol - cache.tc_cache = tc_cache - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 - cache.force_stop = false - cache.retcode = ReturnCode.Default - return cache + return end diff --git a/src/utils.jl b/src/utils.jl index 00b7d3726..90b882af3 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -206,14 +206,6 @@ function evaluate_f(cache, u, p) return nothing end -""" - __matmul!(C, A, B) - -Defaults to `mul!(C, A, B)`. However, for sparse matrices uses `C .= A * B`. -""" -__matmul!(C, A, B) = mul!(C, A, B) -__matmul!(C::AbstractSparseMatrix, A, B) = C .= A * B - # Concretize Algorithms function get_concrete_algorithm(alg, prob) !hasfield(typeof(alg), :ad) && return alg @@ -381,15 +373,8 @@ function __try_factorize_and_check_singular!(linsolve, X) end __try_factorize_and_check_singular!(::FakeLinearSolveJLCache, x) = _issingular(x), false -# TODO: Remove. handled in MaybeInplace.jl -@generated function _axpy!(α, x, y) - hasmethod(axpy!, Tuple{α, x, y}) && return :(axpy!(α, x, y)) - return :(@. y += α * x) -end - # Non-square matrix @inline __needs_square_A(_, ::Number) = true -# @inline __needs_square_A(_, ::StaticArray) = true @inline __needs_square_A(alg, _) = LinearSolve.needs_square_A(alg.linsolve) # Define special concatenation for certain Array combinations From f18fe152ad348c8f961c8da4183dbb5ed4cc627c Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 20:44:06 -0500 Subject: [PATCH 11/25] Start cleaning up TrustRegion --- src/jacobian.jl | 1 + src/trustRegion.jl | 171 ++++++++++++++------------------------------- src/utils.jl | 21 ++++-- 3 files changed, 68 insertions(+), 125 deletions(-) diff --git a/src/jacobian.jl b/src/jacobian.jl index 2ffcdc9aa..83b17f834 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -209,6 +209,7 @@ function __concrete_vjp_autodiff(vjp_autodiff, jvp_autodiff, uf) end # Generic Handling of Krylov Methods for Normal Form Linear Solves +# FIXME: Use MaybeInplace here for efficient matmuls function __update_JᵀJ!(iip::Val, cache, sym::Symbol, J) return __update_JᵀJ!(iip, cache, sym, getproperty(cache, sym), J) end diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 5493aa4d7..3cd40c907 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -1,5 +1,5 @@ """ -`RadiusUpdateSchemes` + RadiusUpdateSchemes `RadiusUpdateSchemes` is the standard enum interface for different types of radius update schemes implemented in the Trust Region method. These schemes specify how the radius of the so-called trust region @@ -16,7 +16,7 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows: """ @enumx RadiusUpdateSchemes begin """ - `RadiusUpdateSchemes.Simple` + RadiusUpdateSchemes.Simple The simple or conventional radius update scheme. This scheme is chosen by default and follows the conventional approach to update the trust region radius, i.e. if the @@ -26,21 +26,21 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows: Simple """ - `RadiusUpdateSchemes.NLsolve` + RadiusUpdateSchemes.NLsolve The same updating scheme as in NLsolve's (https://github.com/JuliaNLSolvers/NLsolve.jl) trust region dogleg implementation. """ NLsolve """ - `RadiusUpdateSchemes.NocedalWright` + RadiusUpdateSchemes.NocedalWright Trust region updating scheme as in Nocedal and Wright [see Alg 11.5, page 291]. """ NocedalWright """ - `RadiusUpdateSchemes.Hei` + RadiusUpdateSchemes.Hei This scheme is proposed by [Hei, L.] (https://www.jstor.org/stable/43693061). The trust region radius depends on the size (norm) of the current step size. The hypothesis is to let the radius converge to zero @@ -50,7 +50,7 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows: Hei """ - `RadiusUpdateSchemes.Yuan` + RadiusUpdateSchemes.Yuan This scheme is proposed by [Yuan, Y.] (https://www.researchgate.net/publication/249011466_A_new_trust_region_algorithm_with_trust_region_radius_converging_to_zero). Similar to Hei's scheme, the trust region is updated in a way so that it converges to zero, however here, @@ -60,7 +60,7 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows: Yuan """ - `RadiusUpdateSchemes.Bastin` + RadiusUpdateSchemes.Bastin This scheme is proposed by [Bastin, et al.] (https://www.researchgate.net/publication/225100660_A_retrospective_trust-region_method_for_unconstrained_optimization). The scheme is called a retrospective update scheme as it uses the model function at the current @@ -71,7 +71,7 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows: Bastin """ - `RadiusUpdateSchemes.Fan` + RadiusUpdateSchemes.Fan This scheme is proposed by [Fan, J.] (https://link.springer.com/article/10.1007/s10589-005-3078-8). It is very much similar to Hei's and Yuan's schemes as it lets the trust region radius depend on the current size (norm) of the objective (merit) @@ -170,7 +170,7 @@ function set_ad(alg::TrustRegion{CJ}, ad) where {CJ} end function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS, - radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, #defaults to conventional radius update + radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1, step_threshold::Real = 1 // 10000, shrink_threshold::Real = 1 // 4, expand_threshold::Real = 3 // 4, shrink_factor::Real = 1 // 4, @@ -233,6 +233,7 @@ end trace end +# TODO: add J_cache function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, termination_condition = nothing, internalnorm = DEFAULT_NORM, linsolve_kwargs = (;), @@ -244,7 +245,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, fu1 = evaluate_f(prob, u) fu_prev = zero(fu1) - loss = get_loss(fu1) + loss = __get_trust_region_loss(fu1) uf, _, J, fu2, jac_cache, du, H, g = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs, linsolve_with_JᵀJ = Val(true), lininit = Val(false)) g = _restructure(fu1, g) @@ -350,64 +351,30 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, p1, p2, p3, p4, ϵ, NLStats(1, 0, 0, 0, 0), tc_cache, trace) end -function perform_step!(cache::TrustRegionCache{true}) - @unpack make_new_J, J, fu, f, u, p, u_gauss_newton, alg, linsolve = cache +function perform_step!(cache::TrustRegionCache{iip}) where {iip} if cache.make_new_J - jacobian!!(J, cache) - __update_JᵀJ!(Val{true}(), cache, :H, J) - __update_Jᵀf!(Val{true}(), cache, :g, :H, J, _vec(fu)) + cache.J = jacobian!!(cache.J, cache) + + __update_JᵀJ!(Val{iip}(), cache, :H, cache.J) + __update_Jᵀf!(Val{iip}(), cache, :g, :H, cache.J, _vec(cache.fu)) cache.stats.njacs += 1 # do not use A = cache.H, b = _vec(cache.g) since it is equivalent # to A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular - linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), - linu = _vec(u_gauss_newton), p = p, reltol = cache.abstol) + linres = dolinsolve(cache.alg.precs, cache.linsolve, A = cache.J, + b = _vec(cache.fu), linu = _vec(cache.u_gauss_newton), p = cache.p, + reltol = cache.abstol) cache.linsolve = linres.cache - @. cache.u_gauss_newton = -1 * u_gauss_newton - end - - # Compute dogleg step - dogleg!(cache) - - # Compute the potentially new u - @. cache.u_tmp = u + cache.du - f(cache.fu_new, cache.u_tmp, p) - trust_region_step!(cache) - cache.stats.nf += 1 - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 - return nothing -end - -function perform_step!(cache::TrustRegionCache{false}) - @unpack make_new_J, fu, f, u, p = cache - - if make_new_J - J = jacobian!!(cache.J, cache) - __update_JᵀJ!(Val{false}(), cache, :H, J) - __update_Jᵀf!(Val{false}(), cache, :g, :H, J, _vec(fu)) - cache.stats.njacs += 1 - - if cache.linsolve === nothing - # Scalar - cache.u_gauss_newton = -cache.H \ cache.g - else - # do not use A = cache.H, b = _vec(cache.g) since it is equivalent - # to A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular - linres = dolinsolve(cache.alg.precs, cache.linsolve, A = cache.J, b = _vec(fu), - linu = _vec(cache.u_gauss_newton), p = p, reltol = cache.abstol) - cache.linsolve = linres.cache - @. cache.u_gauss_newton *= -1 - end + cache.u_gauss_newton = _restructure(cache.u_gauss_newton, linres.u) + @bb @. cache.u_gauss_newton *= -1 end - # Compute the Newton step. + # compute dogleg step dogleg!(cache) - # Compute the potentially new u - cache.u_tmp = u + cache.du - - cache.fu_new = f(cache.u_tmp, p) + # compute the potentially new u + @bb @. cache.u_cache_2 = cache.u + cache.du + evaluate_f(cache, cache.u_tmp, cache.p, Val{:fu_cache_2}()) trust_region_step!(cache) cache.stats.nf += 1 cache.stats.nsolve += 1 @@ -415,27 +382,23 @@ function perform_step!(cache::TrustRegionCache{false}) return nothing end -function retrospective_step!(cache::TrustRegionCache) - @unpack J, fu_prev, fu, u_prev, u = cache - J = jacobian!!(deepcopy(J), cache) - if J isa Number - cache.H = J' * J - cache.g = J' * fu - else - __update_JᵀJ!(Val{isinplace(cache)}(), cache, :H, J) - __update_Jᵀf!(Val{isinplace(cache)}(), cache, :g, :H, J, fu) - end +function retrospective_step!(cache::TrustRegionCache{iip}) where {iip} + J = jacobian!!(cache.J_cache, cache) + __update_JᵀJ!(Val{iip}(), cache, :H, J) + __update_Jᵀf!(Val{iip}(), cache, :g, :H, J, cache.fu) cache.stats.njacs += 1 - @unpack H, g, du = cache - return -(get_loss(fu_prev) - get_loss(fu)) / - (dot(_vec(du), _vec(g)) + __lr_mul(Val(isinplace(cache)), H, _vec(du)) / 2) + # FIXME: Caching in __lr_mul + num = __get_trust_region_loss(cache.fu) - __get_trust_region_loss(cache.fu_cache) + denom = dot(_vec(du), _vec(g)) + __lr_mul(Val{iip}(), H, _vec(du)) / 2 + return num / denom end +# TODO function trust_region_step!(cache::TrustRegionCache) @unpack fu_new, du, g, H, loss, max_trust_r, radius_update_scheme = cache - cache.loss_new = get_loss(fu_new) + cache.loss_new = __get_trust_region_loss(fu_new) # Compute the ratio of the actual reduction to the predicted reduction. cache.r = -(loss - cache.loss_new) / @@ -556,6 +519,7 @@ function trust_region_step!(cache::TrustRegionCache) end @unpack p1 = cache + # TODO: Use the `vjp_autodiff` to for the jvp cache.trust_r = p1 * cache.internalnorm(jvp!(cache)) update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, @@ -608,6 +572,7 @@ function trust_region_step!(cache::TrustRegionCache) end end +# TODO function dogleg!(cache::TrustRegionCache{true}) @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache @@ -638,6 +603,7 @@ function dogleg!(cache::TrustRegionCache{true}) @. cache.du = u_cauchy + τ * u_tmp end +# TODO function dogleg!(cache::TrustRegionCache{false}) @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache @@ -667,20 +633,14 @@ function dogleg!(cache::TrustRegionCache{false}) cache.du = u_cauchy + τ * u_tmp end -function take_step!(cache::TrustRegionCache{true}) - cache.u_prev .= cache.u - cache.u .= cache.u_tmp - cache.fu_prev .= cache.fu - cache.fu .= cache.fu_new -end - -function take_step!(cache::TrustRegionCache{false}) - cache.u_prev = cache.u - cache.u = cache.u_tmp - cache.fu_prev = cache.fu - cache.fu = cache.fu_new +function __take_step!(cache::TrustRegionCache) + @bb copyto!(cache.u_cache, cache.u) + @bb copyto!(cache.u, cache.u_cache_2) # u_tmp --> u_cache_2 + @bb copyto!(cache.fu_cache, cache.fu) + @bb copyto!(cache.fu, cache.fu_cache_2) # fu_new --> fu_cache_2 end +# TODO function jvp!(cache::TrustRegionCache{false}) @unpack f, u, fu, uf = cache if isa(u, Number) @@ -710,40 +670,15 @@ function not_terminated(cache::TrustRegionCache) end return true end -get_fu(cache::TrustRegionCache) = cache.fu -set_fu!(cache::TrustRegionCache, fu) = (cache.fu = fu) - -function SciMLBase.reinit!(cache::TrustRegionCache{iip}, u0 = cache.u; p = cache.p, - abstol = cache.abstol, reltol = cache.reltol, maxiters = cache.maxiters, - termination_condition = get_termination_mode(cache.tc_cache)) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - cache.f(cache.fu, cache.u, p) - else - # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter - cache.u = u0 - cache.fu = cache.f(cache.u, p) - end - - reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, cache.fu, cache.u, - termination_condition) - cache.abstol = abstol - cache.reltol = reltol - cache.tc_cache = tc_cache - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 - cache.force_stop = false - cache.retcode = ReturnCode.Default - cache.make_new_J = true - cache.loss = get_loss(cache.fu) +function __reinit_internal!(cache::TrustRegionCache; kwargs...) + cache.loss = __get_trust_region_loss(cache.fu) cache.shrink_counter = 0 - cache.trust_r = convert(eltype(cache.u), cache.alg.initial_trust_radius) - if iszero(cache.trust_r) - cache.trust_r = convert(eltype(cache.u), cache.max_trust_r / 11) - end - return cache + cache.trust_r = convert(eltype(cache.u), + ifelse(cache.alg.initial_trust_radius == 0, cache.alg.initial_trust_radius, + cache.max_trust_r / 11)) + cache.make_new_J = true + return nothing end + +__get_trust_region_loss(fu) = norm(fu)^2 / 2 diff --git a/src/utils.jl b/src/utils.jl index 90b882af3..0267de434 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -151,8 +151,6 @@ function wrapprecs(_Pl, _Pr, weight) return Pl, Pr end -get_loss(fu) = norm(fu)^2 / 2 - function rfunc(r::R, c2::R, M::R, γ1::R, γ2::R, β::R) where {R <: Real} # R-function for adaptive trust region method if (r ≥ c2) return (2 * (M - 1 - γ2) * atan(r - c2) + (1 + γ2)) / π @@ -188,7 +186,7 @@ function evaluate_f(prob::Union{NonlinearProblem{uType, iip}, return fu end -function evaluate_f(f, u, p, ::Val{iip}; fu = nothing) where {iip} +function evaluate_f(f::F, u, p, ::Val{iip}; fu = nothing) where {F, iip <: Bool} if iip f(fu, u, p) return fu @@ -197,11 +195,20 @@ function evaluate_f(f, u, p, ::Val{iip}; fu = nothing) where {iip} end end -function evaluate_f(cache, u, p) - if isinplace(cache) - cache.prob.f(get_fu(cache), u, p) +function evaluate_f(cache::AbstractNonlinearSolveCache, u, p, + fu_sym::Val{FUSYM} = Val(nothing)) where {FUSYM} + if FUSYM === nothing + if isinplace(cache) + cache.prob.f(get_fu(cache), u, p) + else + set_fu!(cache, cache.prob.f(u, p)) + end else - set_fu!(cache, cache.prob.f(u, p)) + if isinplace(cache) + cache.prob.f(__getproperty(cache, fu_sym), u, p) + else + setproperty!(cache, FUSYM, cache.prob.f(u, p)) + end end return nothing end From eadf16ff89c5ed2df1ecc9249cf43d81bedf6fb5 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 22:38:51 -0500 Subject: [PATCH 12/25] Fix some correctness issues --- src/broyden.jl | 3 +-- src/dfsane.jl | 44 +++++++++++++++++----------------------- src/gaussnewton.jl | 1 - src/klement.jl | 13 ++++++------ src/lbroyden.jl | 1 - src/levenberg.jl | 2 -- src/linesearch.jl | 2 +- src/pseudotransient.jl | 1 - src/raphson.jl | 1 - src/trustRegion.jl | 1 - src/utils.jl | 1 + test/23_test_problems.jl | 7 ++++--- 12 files changed, 32 insertions(+), 45 deletions(-) diff --git a/src/broyden.jl b/src/broyden.jl index 9b165e513..8b271d16c 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -96,7 +96,6 @@ function perform_step!(cache::GeneralBroydenCache{iip}) where {iip} update_trace!(cache, α) check_and_update!(cache, cache.fu, cache.u, cache.u_cache) - cache.stats.nf += 1 cache.force_stop && return nothing @@ -114,7 +113,7 @@ function perform_step!(cache::GeneralBroydenCache{iip}) where {iip} else @bb cache.du .*= -1 @bb cache.J⁻¹dfu = cache.J⁻¹ × vec(cache.dfu) - @bb cache.u_cache = cache.J⁻¹ × vec(cache.du) + @bb cache.u_cache = transpose(cache.J⁻¹) × vec(cache.du) denom = dot(cache.du, cache.J⁻¹dfu) @bb @. cache.du = (cache.du - cache.J⁻¹dfu) / ifelse(iszero(denom), T(1e-5), denom) @bb cache.J⁻¹ += vec(cache.du) × transpose(cache.u_cache) diff --git a/src/dfsane.jl b/src/dfsane.jl index 8dcb1e9ff..570dd7ccd 100644 --- a/src/dfsane.jl +++ b/src/dfsane.jl @@ -58,6 +58,7 @@ end alg u u_cache + u_cache_2 fu fu_cache du @@ -95,6 +96,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::DFSane, args. @bb du = similar(u) @bb u_cache = copy(u) + @bb u_cache_2 = similar(u) fu = evaluate_f(prob, u) @bb fu_cache = copy(fu) @@ -108,10 +110,10 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::DFSane, args. termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, nothing, du; kwargs...) - return DFSaneCache{iip}(alg, u, u_cache, fu, fu_cache, du, history, f_norm, f_norm_0, - alg.M, T(alg.σ_1), T(alg.σ_min), T(alg.σ_max), one(T), T(alg.γ), T(alg.τ_min), - T(alg.τ_max), alg.n_exp, prob.p, false, maxiters, internalnorm, ReturnCode.Default, - abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache, trace) + return DFSaneCache{iip}(alg, u, u_cache, u_cache_2, fu, fu_cache, du, history, f_norm, + f_norm_0, alg.M, T(alg.σ_1), T(alg.σ_min), T(alg.σ_max), one(T), T(alg.γ), + T(alg.τ_min), T(alg.τ_max), alg.n_exp, prob.p, false, maxiters, internalnorm, + ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache, trace) end function perform_step!(cache::DFSaneCache{iip}) where {iip} @@ -119,37 +121,32 @@ function perform_step!(cache::DFSaneCache{iip}) where {iip} T = eltype(cache.u) f_norm_old = f_norm - # Spectral parameter range check - σ_n = sign(σ_n) * clamp(abs(σ_n), σ_min, σ_max) - # Line search direction @bb @. cache.du = -σ_n * cache.fu - η = alg.η_strategy(cache.f_norm_0, cache.stats.nsteps, cache.u, cache.fu) + η = alg.η_strategy(cache.f_norm_0, cache.stats.nsteps + 1, cache.u, cache.fu) f_bar = maximum(cache.history) α₊ = α_1 α₋ = α_1 - @bb axpy!(α₊, cache.du, cache.u) - - evaluate_f(cache, cache.u, cache.p) + @bb @. cache.u_cache_2 = cache.u + α₊ * cache.du + evaluate_f(cache, cache.u_cache_2, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp - α = α₊ + α = -α₊ inner_converged = false for k in 1:(cache.alg.max_inner_iterations) if f_norm ≤ f_bar + η - γ * α₊^2 * f_norm_old - α = α₊ + α = -α₊ inner_converged = true break end α₊ = α₊ * clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old), τ_min, τ_max) - @bb axpy!(-α₋, cache.du, cache.u) - - evaluate_f(cache, cache.u, cache.p) + @bb @. cache.u_cache_2 = cache.u - α₋ * cache.du + evaluate_f(cache, cache.u_cache_2, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp if f_norm ≤ f_bar + η - γ * α₋^2 * f_norm_old @@ -160,9 +157,8 @@ function perform_step!(cache::DFSaneCache{iip}) where {iip} α₋ = α₋ * clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old), τ_min, τ_max) - @bb axpy!(α₊, cache.du, cache.u) - - evaluate_f(cache, cache.u, cache.p) + @bb @. cache.u_cache_2 = cache.u + α₊ * cache.du + evaluate_f(cache, cache.u_cache_2, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp end @@ -171,6 +167,8 @@ function perform_step!(cache::DFSaneCache{iip}) where {iip} cache.force_stop = true end + @bb copyto!(cache.u, cache.u_cache_2) + update_trace!(cache, α) check_and_update!(cache, cache.fu, cache.u, cache.u_cache) @@ -178,14 +176,11 @@ function perform_step!(cache::DFSaneCache{iip}) where {iip} @bb @. cache.u_cache = cache.u - cache.u_cache @bb @. cache.fu_cache = cache.fu - cache.fu_cache - α₊ = sum(abs2, cache.u_cache) - @bb @. cache.u_cache *= cache.fu_cache - α₋ = sum(cache.u_cache) - cache.σ_n = α₊ / α₋ + cache.σ_n = dot(cache.u_cache, cache.u_cache) / dot(cache.fu_cache, cache.u_cache) # Spectral parameter bounds check if !(σ_min ≤ abs(cache.σ_n) ≤ σ_max) - test_norm = sqrt(sum(abs2, cache.fuprev)) + test_norm = dot(cache.fu, cache.fu) cache.σ_n = clamp(inv(test_norm), T(1), T(1e5)) end @@ -196,7 +191,6 @@ function perform_step!(cache::DFSaneCache{iip}) where {iip} # Update history cache.history[cache.stats.nsteps % M + 1] = f_norm - cache.stats.nf += 1 return nothing end diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl index 5ff01d79a..f199b5f29 100644 --- a/src/gaussnewton.jl +++ b/src/gaussnewton.jl @@ -142,7 +142,6 @@ function perform_step!(cache::GaussNewtonCache{iip}) where {iip} @bb copyto!(cache.u_cache, cache.u) @bb copyto!(cache.dfu, cache.fu) - cache.stats.nf += 1 cache.stats.njacs += 1 cache.stats.nsolve += 1 cache.stats.nfactors += 1 diff --git a/src/klement.jl b/src/klement.jl index cceb51c3c..62aa8f681 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -1,6 +1,6 @@ """ - GeneralKlement(; max_resets = 5, linsolve = nothing, - linesearch = nothing, precs = DEFAULT_PRECS) + GeneralKlement(; max_resets = 5, linsolve = nothing, linesearch = nothing, + precs = DEFAULT_PRECS) An implementation of `Klement` with line search, preconditioning and customizable linear solves. @@ -91,8 +91,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKleme termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, J, du; kwargs...) - @bb u_cache = similar(u) - @bb fu_cache = similar(fu) + @bb u_cache = copy(u) + @bb fu_cache = copy(fu) @bb J_cache = similar(J) @bb J_cache_2 = similar(J) @bb Jdu = similar(fu) @@ -139,7 +139,6 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} @bb copyto!(cache.u_cache, cache.u) - cache.stats.nf += 1 cache.stats.nsolve += 1 cache.stats.nfactors += 1 @@ -152,8 +151,8 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} @bb cache.Jdu_cache = cache.J_cache × vec(cache.Jdu) @bb cache.Jdu = cache.J × vec(cache.du) @bb @. cache.fu_cache = (cache.fu - cache.fu_cache - cache.Jdu) / - max(cache.Jdu_cache, eps(real(T))) - @bb cache.J_cache = vec(cache.fu) × transpose(_vec(cache.du)) + ifelse(iszero(cache.Jdu_cache), T(1e-5), cache.Jdu_cache) + @bb cache.J_cache = vec(cache.fu_cache) × transpose(_vec(cache.du)) @bb @. cache.J_cache *= cache.J @bb cache.J_cache_2 = cache.J_cache × cache.J @bb cache.J .+= cache.J_cache_2 diff --git a/src/lbroyden.jl b/src/lbroyden.jl index 611e5511b..34668e5c8 100644 --- a/src/lbroyden.jl +++ b/src/lbroyden.jl @@ -125,7 +125,6 @@ function perform_step!(cache::LimitedMemoryBroydenCache{iip}) where {iip} ApplyArray(*, Vᵀ_part, U_part), cache.du, α) check_and_update!(cache, cache.fu, cache.u, cache.u_cache) - cache.stats.nf += 1 cache.force_stop && return nothing diff --git a/src/levenberg.jl b/src/levenberg.jl index 3b523807c..5806734ae 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -312,7 +312,6 @@ function perform_step!(cache::LevenbergMarquardtCache{true, fastls}) where {fast _vec(cache.δ) .= _vec(v) .+ _vec(cache.a) ./ 2 @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache f(cache.fu_tmp, u .+ δ, p) - cache.stats.nf += 1 loss = cache.internalnorm(cache.fu_tmp) # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). @@ -411,7 +410,6 @@ function perform_step!(cache::LevenbergMarquardtCache{false, fastls}) where {fas cache.δ = _restructure(cache.δ, _vec(v) .+ _vec(cache.a) ./ 2) @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache fu_new = f(u .+ δ, p) - cache.stats.nf += 1 loss = cache.internalnorm(fu_new) # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). diff --git a/src/linesearch.jl b/src/linesearch.jl index a2514e3a7..33de25ae7 100644 --- a/src/linesearch.jl +++ b/src/linesearch.jl @@ -1,5 +1,5 @@ """ - LineSearch(method = nothing, autodiff = nothing, alpha = true) + LineSearch(; method = nothing, autodiff = nothing, alpha = true) Wrapper over algorithms from [LineSearches.jl](https://github.com/JuliaNLSolvers/LineSearches.jl/). Allows automatic diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index b01762493..d4a41015a 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -145,7 +145,6 @@ function perform_step!(cache::PseudoTransientCache{iip}) where {iip} check_and_update!(cache, cache.fu, cache.u, cache.u_cache) @bb copyto!(cache.u_cache, cache.u) - cache.stats.nf += 1 cache.stats.njacs += 1 cache.stats.nsolve += 1 cache.stats.nfactors += 1 diff --git a/src/raphson.jl b/src/raphson.jl index ac40b7c64..baf2ec10c 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -119,7 +119,6 @@ function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} check_and_update!(cache, cache.fu, cache.u, cache.u_cache) @bb copyto!(cache.u_cache, cache.u) - cache.stats.nf += 1 cache.stats.njacs += 1 cache.stats.nsolve += 1 cache.stats.nfactors += 1 diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 3cd40c907..7e5497ffd 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -376,7 +376,6 @@ function perform_step!(cache::TrustRegionCache{iip}) where {iip} @bb @. cache.u_cache_2 = cache.u + cache.du evaluate_f(cache, cache.u_tmp, cache.p, Val{:fu_cache_2}()) trust_region_step!(cache) - cache.stats.nf += 1 cache.stats.nsolve += 1 cache.stats.nfactors += 1 return nothing diff --git a/src/utils.jl b/src/utils.jl index 0267de434..5bb4e8dbb 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -197,6 +197,7 @@ end function evaluate_f(cache::AbstractNonlinearSolveCache, u, p, fu_sym::Val{FUSYM} = Val(nothing)) where {FUSYM} + cache.stats.nf += 1 if FUSYM === nothing if isinplace(cache) cache.prob.f(get_fu(cache), u, p) diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 53b7b0f7b..8f6519e73 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -12,7 +12,7 @@ function test_on_library(problems, dicts, alg_ops, broken_tests, ϵ = 1e-4; @testset "$idx: $(dict["title"])" begin for alg in alg_ops try - sol = solve(nlprob, alg; + sol = solve(nlprob, alg; maxiters = 10000, termination_condition = AbsNormTerminationMode()) problem(res, sol.u, nothing) @@ -23,7 +23,8 @@ function test_on_library(problems, dicts, alg_ops, broken_tests, ϵ = 1e-4; end broken = idx in broken_tests[alg] ? true : false @test norm(res)≤ϵ broken=broken - catch + catch err + @error err broken = idx in broken_tests[alg] ? true : false if broken @test false broken=true @@ -83,7 +84,7 @@ end alg_ops = (DFSane(),) broken_tests = Dict(alg => Int[] for alg in alg_ops) - broken_tests[alg_ops[1]] = [1, 2, 3, 4, 5, 6, 11, 22] + broken_tests[alg_ops[1]] = [1, 2, 3, 5, 6, 21] test_on_library(problems, dicts, alg_ops, broken_tests) end From 954a79965f0ed08da0b4da34d9e5eb7180d1519c Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 23:04:36 -0500 Subject: [PATCH 13/25] Cleanup Normal Form Equation Construction --- src/gaussnewton.jl | 5 +++-- src/jacobian.jl | 31 ++++++++++--------------------- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl index f199b5f29..c885b02f5 100644 --- a/src/gaussnewton.jl +++ b/src/gaussnewton.jl @@ -116,8 +116,8 @@ function perform_step!(cache::GaussNewtonCache{iip}) where {iip} # Use normal form to solve the Linear Problem if cache.JᵀJ !== nothing - __update_JᵀJ!(Val{iip}(), cache, :JᵀJ, cache.J) - __update_Jᵀf!(Val{iip}(), cache, :Jᵀf, :JᵀJ, cache.J, cache.fu) + __update_JᵀJ!(cache, Val(:JᵀJ)) + __update_Jᵀf!(cache, Val(:JᵀJ)) A, b = __maybe_symmetric(cache.JᵀJ), _vec(cache.Jᵀf) else A, b = cache.J, _vec(cache.fu) @@ -148,6 +148,7 @@ function perform_step!(cache::GaussNewtonCache{iip}) where {iip} return nothing end +# FIXME: Reinit `JᵀJ` operator if `p` is changed function __reinit_internal!(cache::GaussNewtonCache; termination_condition = get_termination_mode(cache.tc_cache_1), kwargs...) abstol, reltol, tc_cache_1 = init_termination_cache(cache.abstol, cache.reltol, diff --git a/src/jacobian.jl b/src/jacobian.jl index 83b17f834..03c2492fe 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -209,29 +209,18 @@ function __concrete_vjp_autodiff(vjp_autodiff, jvp_autodiff, uf) end # Generic Handling of Krylov Methods for Normal Form Linear Solves -# FIXME: Use MaybeInplace here for efficient matmuls -function __update_JᵀJ!(iip::Val, cache, sym::Symbol, J) - return __update_JᵀJ!(iip, cache, sym, getproperty(cache, sym), J) +function __update_JᵀJ!(cache::AbstractNonlinearSolveCache) + if !(cache.JᵀJ isa KrylovJᵀJ) + @bb cache.JᵀJ = transpose(cache.J) × cache.J + end end -__update_JᵀJ!(::Val{false}, cache, sym::Symbol, _, J) = setproperty!(cache, sym, J' * J) -__update_JᵀJ!(::Val{true}, cache, sym::Symbol, _, J) = mul!(getproperty(cache, sym), J', J) -__update_JᵀJ!(::Val{false}, cache, sym::Symbol, H::KrylovJᵀJ, J) = H -__update_JᵀJ!(::Val{true}, cache, sym::Symbol, H::KrylovJᵀJ, J) = H -function __update_Jᵀf!(iip::Val, cache, sym1::Symbol, sym2::Symbol, J, fu) - return __update_Jᵀf!(iip, cache, sym1, sym2, getproperty(cache, sym2), J, fu) -end -function __update_Jᵀf!(::Val{false}, cache, sym1::Symbol, sym2::Symbol, _, J, fu) - return setproperty!(cache, sym1, _restructure(getproperty(cache, sym1), J' * fu)) -end -function __update_Jᵀf!(::Val{true}, cache, sym1::Symbol, sym2::Symbol, _, J, fu) - return mul!(_vec(getproperty(cache, sym1)), J', fu) -end -function __update_Jᵀf!(::Val{false}, cache, sym1::Symbol, sym2::Symbol, H::KrylovJᵀJ, J, fu) - return setproperty!(cache, sym1, _restructure(getproperty(cache, sym1), H.Jᵀ * fu)) -end -function __update_Jᵀf!(::Val{true}, cache, sym1::Symbol, sym2::Symbol, H::KrylovJᵀJ, J, fu) - return mul!(_vec(getproperty(cache, sym1)), H.Jᵀ, fu) +function __update_Jᵀf!(cache::AbstractNonlinearSolveCache) + if cache.JᵀJ isa KrylovJᵀJ + @bb cache.Jᵀf = cache.JᵀJ.Jᵀ × cache.fu + else + @bb cache.Jᵀf = transpose(cache.J) × vec(cache.fu) + end end # Left-Right Multiplication From 51f4a3e30ab5f8d375d24705d4f42cca51d097e7 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sun, 3 Dec 2023 21:38:32 -0500 Subject: [PATCH 14/25] make progress on LM --- src/NonlinearSolve.jl | 2 +- src/gaussnewton.jl | 4 +- src/levenberg.jl | 411 +++++++++++++++++------------------------- src/utils.jl | 26 ++- 4 files changed, 189 insertions(+), 254 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index dacc98910..578343345 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -170,7 +170,7 @@ include("extension_algs.jl") include("linesearch.jl") include("raphson.jl") # include("trustRegion.jl") -# include("levenberg.jl") +include("levenberg.jl") include("gaussnewton.jl") include("dfsane.jl") include("pseudotransient.jl") diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl index c885b02f5..94f2e975a 100644 --- a/src/gaussnewton.jl +++ b/src/gaussnewton.jl @@ -152,8 +152,8 @@ end function __reinit_internal!(cache::GaussNewtonCache; termination_condition = get_termination_mode(cache.tc_cache_1), kwargs...) abstol, reltol, tc_cache_1 = init_termination_cache(cache.abstol, cache.reltol, - cache.fu1, cache.u, termination_condition) - _, _, tc_cache_2 = init_termination_cache(cache.abstol, cache.reltol, cache.fu1, + cache.fu, cache.u, termination_condition) + _, _, tc_cache_2 = init_termination_cache(cache.abstol, cache.reltol, cache.fu, cache.u, termination_condition) cache.tc_cache_1 = tc_cache_1 diff --git a/src/levenberg.jl b/src/levenberg.jl index 5806734ae..1ef403895 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -79,18 +79,17 @@ routine for the factorization without constructing `JᵀJ` and `Jᵀf`. For more [this paper](https://arxiv.org/abs/1201.5885) to use a minimum value of the elements in `DᵀD` to prevent the damping from being too small. Defaults to `1e-8`. """ -@concrete struct LevenbergMarquardt{CJ, AD, T} <: - AbstractNewtonAlgorithm{CJ, AD} +@concrete struct LevenbergMarquardt{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD} ad::AD linsolve precs - damping_initial::T - damping_increase_factor::T - damping_decrease_factor::T - finite_diff_step_geodesic::T - α_geodesic::T - b_uphill::T - min_damping_D::T + damping_initial + damping_increase_factor + damping_decrease_factor + finite_diff_step_geodesic + α_geodesic + b_uphill + min_damping_D end function set_ad(alg::LevenbergMarquardt{CJ}, ad) where {CJ} @@ -100,11 +99,10 @@ function set_ad(alg::LevenbergMarquardt{CJ}, ad) where {CJ} end function LevenbergMarquardt(; concrete_jac = nothing, linsolve = nothing, - precs = DEFAULT_PRECS, damping_initial::Real = 1.0, - damping_increase_factor::Real = 2.0, - damping_decrease_factor::Real = 3.0, finite_diff_step_geodesic::Real = 0.1, - α_geodesic::Real = 0.75, b_uphill::Real = 1.0, min_damping_D::AbstractFloat = 1e-8, - adkwargs...) + precs = DEFAULT_PRECS, damping_initial::Real = 1.0, α_geodesic::Real = 0.75, + damping_increase_factor::Real = 2.0, damping_decrease_factor::Real = 3.0, + finite_diff_step_geodesic::Real = 0.1, b_uphill::Real = 1.0, + min_damping_D::Real = 1e-8, adkwargs...) ad = default_adargs_to_adtype(; adkwargs...) _concrete_jac = ifelse(concrete_jac === nothing, true, concrete_jac) return LevenbergMarquardt{_unwrap_val(_concrete_jac)}(ad, linsolve, precs, @@ -117,14 +115,25 @@ end f alg u - u_prev - fu1 - fu2 + u_cache + u_cache_2 + fu + fu_cache + fu_cache_2 du + du_cache + J + JᵀJ + Jv + DᵀD + v + v_cache + a + mat_tmp + rhs_tmp p uf linsolve - J jac_cache force_stop::Bool maxiters::Int @@ -133,8 +142,6 @@ end abstol reltol prob - DᵀD - JᵀJ λ λ_factor damping_increase_factor @@ -143,20 +150,9 @@ end α_geodesic b_uphill min_damping_D - v - a - tmp_vec - v_old norm_v_old - δ loss_old make_new_J::Bool - fu_tmp - u_tmp - Jv - mat_tmp - rhs_tmp - J² stats::NLStats tc_cache_1 tc_cache_2 @@ -170,269 +166,186 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, linsolve_kwargs = (;), kwargs...) where {uType, iip, F} alg = get_concrete_algorithm(alg_, prob) @unpack f, u0, p = prob - u = alias_u0 ? u0 : deepcopy(u0) - fu1 = evaluate_f(prob, u) - linsolve_with_JᵀJ = Val(__needs_square_A(alg, u0)) + u = __maybe_unaliased(u0, alias_u0) + T = eltype(u) + fu = evaluate_f(prob, u) - if _unwrap_val(linsolve_with_JᵀJ) - uf, linsolve, J, fu2, jac_cache, du, JᵀJ, v = jacobian_caches(alg, f, u, p, - Val(iip); linsolve_kwargs, linsolve_with_JᵀJ) - J² = nothing - else - uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); - linsolve_kwargs, linsolve_with_JᵀJ) - JᵀJ = similar(_vec(u)) - J² = similar(J) - v = similar(du) - end + fastls = !__needs_square_A(alg, u0) - λ = convert(eltype(u), alg.damping_initial) - λ_factor = convert(eltype(u), alg.damping_increase_factor) - damping_increase_factor = convert(eltype(u), alg.damping_increase_factor) - damping_decrease_factor = convert(eltype(u), alg.damping_decrease_factor) - h = convert(eltype(u), alg.finite_diff_step_geodesic) - α_geodesic = convert(eltype(u), alg.α_geodesic) - b_uphill = convert(eltype(u), alg.b_uphill) - min_damping_D = convert(eltype(u), alg.min_damping_D) - - if u isa Number - DᵀD = min_damping_D + if !fastls + uf, linsolve, J, fu_cache, jac_cache, du, JᵀJ, v = jacobian_caches(alg, f, u, p, + Val(iip); linsolve_kwargs, linsolve_with_JᵀJ = Val(true)) else - d = similar(u) - d .= min_damping_D - DᵀD = Diagonal(_vec(d)) + uf, linsolve, J, fu_cache, jac_cache, du = jacobian_caches(alg, f, u, p, + Val(iip); linsolve_kwargs, linsolve_with_JᵀJ = Val(false)) + @bb JᵀJ = similar(u) + @bb v = similar(du) end - loss = internalnorm(fu1) - a = _mutable_zero(u) - tmp_vec = _mutable_zero(u) - v_old = _mutable_zero(u) - δ = _mutable_zero(u) + λ = T(alg.damping_initial) + λ_factor = T(alg.damping_increase_factor) + damping_increase_factor = T(alg.damping_increase_factor) + damping_decrease_factor = T(alg.damping_decrease_factor) + h = T(alg.finite_diff_step_geodesic) + α_geodesic = T(alg.α_geodesic) + b_uphill = T(alg.b_uphill) + min_damping_D = T(alg.min_damping_D) + + DᵀD = __init_diagonal(u, min_damping_D) + + loss = internalnorm(fu) + + @bb a = similar(du) + @bb v_old = copy(v) + @bb δ = similar(du) + make_new_J = true - fu_tmp = zero(fu1) - abstol, reltol, tc_cache_1 = init_termination_cache(abstol, reltol, fu1, u, + abstol, reltol, tc_cache_1 = init_termination_cache(abstol, reltol, fu, u, termination_condition) if prob isa NonlinearLeastSquaresProblem - _, _, tc_cache_2 = init_termination_cache(abstol, reltol, fu1, u, + _, _, tc_cache_2 = init_termination_cache(abstol, reltol, fu, u, termination_condition) else tc_cache_2 = nothing end - trace = init_nonlinearsolve_trace(alg, u, fu1, ApplyArray(__zero, J), du; kwargs...) + trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(__zero, J), du; kwargs...) - if _unwrap_val(linsolve_with_JᵀJ) - mat_tmp = zero(JᵀJ) + if !fastls + @bb mat_tmp = similar(JᵀJ) + @bb mat_tmp .*= T(0) rhs_tmp = nothing else - # Preserve Types mat_tmp = _vcat(J, DᵀD) - fill!(mat_tmp, zero(eltype(u))) - rhs_tmp = vcat(_vec(fu1), _vec(u)) - fill!(rhs_tmp, zero(eltype(u))) - linsolve = linsolve_caches(mat_tmp, rhs_tmp, u, p, alg) + @bb mat_tmp .*= T(0) + rhs_tmp = vcat(_vec(fu), _vec(u)) + @bb rhs_tmp .*= T(0) + linsolve = linsolve_caches(mat_tmp, rhs_tmp, u, p, alg; linsolve_kwargs) end - return LevenbergMarquardtCache{iip, !_unwrap_val(linsolve_with_JᵀJ)}(f, alg, u, copy(u), - fu1, fu2, du, p, uf, linsolve, J, jac_cache, false, maxiters, internalnorm, - ReturnCode.Default, abstol, reltol, prob, DᵀD, JᵀJ, λ, λ_factor, - damping_increase_factor, damping_decrease_factor, h, α_geodesic, b_uphill, - min_damping_D, v, a, tmp_vec, v_old, loss, δ, loss, make_new_J, fu_tmp, zero(u), - zero(fu1), mat_tmp, rhs_tmp, J², NLStats(1, 0, 0, 0, 0), tc_cache_1, tc_cache_2, - trace) + @bb u_cache = copy(u) + @bb u_cache_2 = similar(u) + @bb fu_cache_2 = similar(fu) + @bb du_cache = similar(du) + Jv = J * v + @bb v_cache = similar(v) + + return LevenbergMarquardtCache{iip, fastls}(f, alg, u, u_cache, u_cache_2, fu, fu_cache, + fu_cache_2, du, du_cache, J, JᵀJ, Jv, DᵀD, v, v_cache, a, mat_tmp, rhs_tmp, p, uf, + linsolve, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, + reltol, prob, λ, λ_factor, damping_increase_factor, damping_decrease_factor, h, + α_geodesic, b_uphill, min_damping_D, internalnorm(v_cache), loss, make_new_J, + NLStats(1, 0, 0, 0, 0), tc_cache_1, tc_cache_2, trace) end -function perform_step!(cache::LevenbergMarquardtCache{true, fastls}) where {fastls} - @unpack fu1, f, make_new_J = cache - - if make_new_J - jacobian!!(cache.J, cache) +function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, fastls} + if cache.make_new_J + cache.J = jacobian!!(cache.J, cache) if fastls - cache.J² .= abs2.(cache.J) - sum!(cache.JᵀJ', cache.J²) - cache.DᵀD.diag .= max.(cache.DᵀD.diag, cache.JᵀJ) + cache.JᵀJ = __sum_JᵀJ!!(cache.JᵀJ, cache.J) + # cache.DᵀD.diag .= max.(cache.DᵀD.diag, cache.JᵀJ) else - __matmul!(cache.JᵀJ, cache.J', cache.J) - cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) + @bb cache.JᵀJ = transpose(cache.J) × cache.J + # cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) end cache.make_new_J = false - cache.stats.njacs += 1 end - @unpack u, u_prev, p, λ, JᵀJ, DᵀD, J, alg, linsolve = cache + + # @unpack u, u_prev, p, λ, JᵀJ, DᵀD, J, alg, linsolve = cache # Usual Levenberg-Marquardt step ("velocity"). # The following lines do: cache.v = -cache.mat_tmp \ cache.u_tmp - if fastls - copyto!(@view(cache.mat_tmp[1:length(fu1), :]), cache.J) - cache.mat_tmp[(length(fu1) + 1):end, :] .= λ .* cache.DᵀD - cache.rhs_tmp[1:length(fu1)] .= _vec(fu1) - linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, - b = cache.rhs_tmp, linu = _vec(cache.du), p = p, reltol = cache.abstol) - _vec(cache.v) .= -_vec(cache.du) - else - mul!(_vec(cache.u_tmp), J', _vec(fu1)) - @. cache.mat_tmp = JᵀJ + λ * DᵀD - linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp), - b = _vec(cache.u_tmp), linu = _vec(cache.du), p = p, reltol = cache.abstol) - cache.linsolve = linres.cache - _vec(cache.v) .= -_vec(cache.du) - end + # if fastls + # copyto!(@view(cache.mat_tmp[1:length(fu1), :]), cache.J) + # cache.mat_tmp[(length(fu1) + 1):end, :] .= λ .* cache.DᵀD + # cache.rhs_tmp[1:length(fu1)] .= _vec(fu1) + # linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, + # b = cache.rhs_tmp, linu = _vec(cache.du), p = p, reltol = cache.abstol) + # _vec(cache.v) .= -_vec(cache.du) + # else + # mul!(_vec(cache.u_tmp), J', _vec(fu1)) + # @. cache.mat_tmp = JᵀJ + λ * DᵀD + # linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp), + # b = _vec(cache.u_tmp), linu = _vec(cache.du), p = p, reltol = cache.abstol) + # cache.linsolve = linres.cache + # _vec(cache.v) .= -_vec(cache.du) + # end + + # update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, + # cache.v) + + # # Geodesic acceleration (step_size = v + a / 2). + # @unpack v, α_geodesic, h = cache + # cache.u_tmp .= _restructure(cache.u_tmp, _vec(u) .+ h .* _vec(v)) + # f(cache.fu_tmp, cache.u_tmp, p) + + # # The following lines do: cache.a = -J \ cache.fu_tmp + # # NOTE: Don't pass `A` in again, since we want to reuse the previous solve + # mul!(_vec(cache.Jv), J, _vec(v)) + # @. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu1) / h - cache.Jv) + # if fastls + # cache.rhs_tmp[1:length(fu1)] .= _vec(cache.fu_tmp) + # linres = dolinsolve(alg.precs, linsolve; b = cache.rhs_tmp, linu = _vec(cache.du), + # p = p, reltol = cache.abstol) + # else + # mul!(_vec(cache.u_tmp), J', _vec(cache.fu_tmp)) + # linres = dolinsolve(alg.precs, linsolve; b = _vec(cache.u_tmp), + # linu = _vec(cache.du), p = p, reltol = cache.abstol) + # cache.linsolve = linres.cache + # @. cache.a = -cache.du + # end - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, - cache.v) - - # Geodesic acceleration (step_size = v + a / 2). - @unpack v, α_geodesic, h = cache - cache.u_tmp .= _restructure(cache.u_tmp, _vec(u) .+ h .* _vec(v)) - f(cache.fu_tmp, cache.u_tmp, p) - - # The following lines do: cache.a = -J \ cache.fu_tmp - # NOTE: Don't pass `A` in again, since we want to reuse the previous solve - mul!(_vec(cache.Jv), J, _vec(v)) - @. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu1) / h - cache.Jv) - if fastls - cache.rhs_tmp[1:length(fu1)] .= _vec(cache.fu_tmp) - linres = dolinsolve(alg.precs, linsolve; b = cache.rhs_tmp, linu = _vec(cache.du), - p = p, reltol = cache.abstol) - else - mul!(_vec(cache.u_tmp), J', _vec(cache.fu_tmp)) - linres = dolinsolve(alg.precs, linsolve; b = _vec(cache.u_tmp), - linu = _vec(cache.du), p = p, reltol = cache.abstol) - cache.linsolve = linres.cache - @. cache.a = -cache.du - end cache.stats.nsolve += 2 cache.stats.nfactors += 2 # Require acceptable steps to satisfy the following condition. norm_v = cache.internalnorm(v) if 2 * cache.internalnorm(cache.a) ≤ α_geodesic * norm_v - _vec(cache.δ) .= _vec(v) .+ _vec(cache.a) ./ 2 - @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache - f(cache.fu_tmp, u .+ δ, p) - loss = cache.internalnorm(cache.fu_tmp) - - # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). - β = dot(v, v_old) / (norm_v * norm_v_old) - if (1 - β)^b_uphill * loss ≤ loss_old - # Accept step. - cache.u .+= δ - check_and_update!(cache.tc_cache_1, cache, cache.fu_tmp, cache.u, cache.u_prev) - if !cache.force_stop && cache.tc_cache_2 !== nothing - # For NLLS Problems - cache.fu1 .= cache.fu_tmp .- cache.fu1 - check_and_update!(cache.tc_cache_2, cache, cache.fu1, cache.u, cache.u_prev) - end - cache.fu1 .= cache.fu_tmp - _vec(cache.v_old) .= _vec(v) - cache.norm_v_old = norm_v - cache.loss_old = loss - cache.λ_factor = 1 / cache.damping_decrease_factor - cache.make_new_J = true - end + # _vec(cache.δ) .= _vec(v) .+ _vec(cache.a) ./ 2 + # @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache + # f(cache.fu_tmp, u .+ δ, p) + # loss = cache.internalnorm(cache.fu_tmp) + + # # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). + # β = dot(v, v_old) / (norm_v * norm_v_old) + # if (1 - β)^b_uphill * loss ≤ loss_old + # # Accept step. + # cache.u .+= δ + # check_and_update!(cache.tc_cache_1, cache, cache.fu_tmp, cache.u, cache.u_prev) + # if !cache.force_stop && cache.tc_cache_2 !== nothing + # # For NLLS Problems + # cache.fu1 .= cache.fu_tmp .- cache.fu1 + # check_and_update!(cache.tc_cache_2, cache, cache.fu1, cache.u, cache.u_prev) + # end + # cache.fu1 .= cache.fu_tmp + # _vec(cache.v_old) .= _vec(v) + # cache.norm_v_old = norm_v + # cache.loss_old = loss + # cache.λ_factor = 1 / cache.damping_decrease_factor + # cache.make_new_J = true + # end end - @. u_prev = u + + @bb copyto!(cache.u_cache, cache.u) cache.λ *= cache.λ_factor cache.λ_factor = cache.damping_increase_factor return nothing end -function perform_step!(cache::LevenbergMarquardtCache{false, fastls}) where {fastls} - @unpack fu1, f, make_new_J = cache - - if make_new_J - cache.J = jacobian!!(cache.J, cache) - if fastls - cache.JᵀJ = _vec(sum(abs2, cache.J; dims = 1)) - cache.DᵀD.diag .= max.(cache.DᵀD.diag, cache.JᵀJ) - else - cache.JᵀJ = cache.J' * cache.J - if cache.JᵀJ isa Number - cache.DᵀD = max(cache.DᵀD, cache.JᵀJ) - else - cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) - end - end - cache.make_new_J = false - cache.stats.njacs += 1 +function __reinit_internal!(cache::LevenbergMarquardtCache; + termination_condition = get_termination_mode(cache.tc_cache_1), kwargs...) + abstol, reltol, tc_cache_1 = init_termination_cache(cache.abstol, cache.reltol, + cache.fu, cache.u, termination_condition) + if cache.tc_cache_2 !== nothing + _, _, tc_cache_2 = init_termination_cache(cache.abstol, cache.reltol, cache.fu, + cache.u, termination_condition) + cache.tc_cache_2 = tc_cache_2 end - @unpack u, u_prev, p, λ, JᵀJ, DᵀD, J, linsolve, alg = cache - - # Usual Levenberg-Marquardt step ("velocity"). - if fastls - cache.mat_tmp = _vcat(J, λ * cache.DᵀD) - cache.rhs_tmp[1:length(fu1)] .= -_vec(fu1) - linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, - b = cache.rhs_tmp, linu = _vec(cache.v), p = p, reltol = cache.abstol) - else - cache.mat_tmp = JᵀJ + λ * DᵀD - if linsolve === nothing - cache.v = -cache.mat_tmp \ (J' * fu1) - else - linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp), - b = _vec(J' * _vec(fu1)), linu = _vec(cache.v), p, reltol = cache.abstol) - cache.linsolve = linres.cache - cache.v .*= -1 - end - end - - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, - cache.v) - - @unpack v, h, α_geodesic = cache - # Geodesic acceleration (step_size = v + a / 2). - rhs_term = _vec(((2 / h) .* ((_vec(f(u .+ h .* _restructure(u, v), p)) .- - _vec(fu1)) ./ h .- J * _vec(v)))) - if fastls - cache.rhs_tmp[1:length(fu1)] .= -_vec(rhs_term) - linres = dolinsolve(alg.precs, linsolve; - b = cache.rhs_tmp, linu = _vec(cache.a), p = p, reltol = cache.abstol) - else - if linsolve === nothing - cache.a = -cache.mat_tmp \ _vec(J' * rhs_term) - else - linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp), - b = _mutable(_vec(J' * rhs_term)), linu = _vec(cache.a), p, - reltol = cache.abstol, reuse_A_if_factorization = true) - cache.linsolve = linres.cache - cache.a .*= -1 - end - end - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 - - # Require acceptable steps to satisfy the following condition. - norm_v = cache.internalnorm(v) - if 2 * cache.internalnorm(cache.a) ≤ α_geodesic * norm_v - cache.δ = _restructure(cache.δ, _vec(v) .+ _vec(cache.a) ./ 2) - @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache - fu_new = f(u .+ δ, p) - loss = cache.internalnorm(fu_new) - - # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). - β = dot(v, v_old) / (norm_v * norm_v_old) - if (1 - β)^b_uphill * loss ≤ loss_old - # Accept step. - cache.u += δ - check_and_update!(cache.tc_cache_1, cache, fu_new, cache.u, cache.u_prev) - if !cache.force_stop && cache.tc_cache_2 !== nothing - # For NLLS Problems - cache.fu1 = fu_new .- cache.fu1 - check_and_update!(cache.tc_cache_2, cache, cache.fu1, cache.u, cache.u_prev) - end - cache.fu1 = fu_new - cache.v_old = _restructure(cache.v_old, v) - cache.norm_v_old = norm_v - cache.loss_old = loss - cache.λ_factor = 1 / cache.damping_decrease_factor - cache.make_new_J = true - end - end - cache.u_prev = @. cache.u - cache.λ *= cache.λ_factor - cache.λ_factor = cache.damping_increase_factor + cache.tc_cache_1 = tc_cache_1 + cache.abstol = abstol + cache.reltol = reltol return nothing end diff --git a/src/utils.jl b/src/utils.jl index 5bb4e8dbb..a6b95c9ef 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -89,8 +89,8 @@ end DEFAULT_PRECS(W, du, u, p, t, newW, Plprev, Prprev, cachedata) = nothing, nothing function dolinsolve(precs::P, linsolve::FakeLinearSolveJLCache; A = nothing, - linu = nothing, b = nothing, du = nothing, p = nothing, weight = nothing, - cachedata = nothing, reltol = nothing, reuse_A_if_factorization = false) where {P} + linu = nothing, b = nothing, du = nothing, p = nothing, weight = nothing, + cachedata = nothing, reltol = nothing, reuse_A_if_factorization = false) where {P} A !== nothing && (linsolve.A = A) b !== nothing && (linsolve.b = b) linres = linsolve.A \ linsolve.b @@ -425,3 +425,25 @@ end return w end @inline __init_ones(x::StaticArray) = ones(typeof(x)) + +# Diagonal of type `u` +__init_diagonal(u::Number, v) = oftype(u, v) +function __init_diagonal(u::SArray, v) + u_ = vec(u) + return Diagonal(ones(typeof(u_)) * v) +end +function __init_diagonal(u, v) + d = similar(vec(u)) + d .= v + return Diagonal(d) +end + +# Reduce sum +function __sum_JᵀJ!!(y, J) + if setindex_trait(y) === CanSetindex() + sum!(abs2, y, J') + return y + else + return sum(abs2, J'; dims = 1) + end +end From 28e39dd025bfd60fca2f571f2a32248ba1f68563 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 4 Dec 2023 00:37:50 -0500 Subject: [PATCH 15/25] Kind of finish LM --- src/levenberg.jl | 151 +++++++++++++++++++++++++---------------------- src/utils.jl | 18 ++++++ 2 files changed, 100 insertions(+), 69 deletions(-) diff --git a/src/levenberg.jl b/src/levenberg.jl index 1ef403895..1836bceaa 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -179,7 +179,8 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, else uf, linsolve, J, fu_cache, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs, linsolve_with_JᵀJ = Val(false)) - @bb JᵀJ = similar(u) + u_ = _vec(u) + @bb JᵀJ = similar(u_) @bb v = similar(du) end @@ -241,91 +242,103 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, end function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, fastls} + @unpack alg, linsolve = cache + if cache.make_new_J cache.J = jacobian!!(cache.J, cache) if fastls cache.JᵀJ = __sum_JᵀJ!!(cache.JᵀJ, cache.J) - # cache.DᵀD.diag .= max.(cache.DᵀD.diag, cache.JᵀJ) else @bb cache.JᵀJ = transpose(cache.J) × cache.J - # cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ)) end + cache.DᵀD = __update_LM_diagonal!!(cache.DᵀD, cache.JᵀJ) cache.make_new_J = false end - # @unpack u, u_prev, p, λ, JᵀJ, DᵀD, J, alg, linsolve = cache - # Usual Levenberg-Marquardt step ("velocity"). # The following lines do: cache.v = -cache.mat_tmp \ cache.u_tmp - # if fastls - # copyto!(@view(cache.mat_tmp[1:length(fu1), :]), cache.J) - # cache.mat_tmp[(length(fu1) + 1):end, :] .= λ .* cache.DᵀD - # cache.rhs_tmp[1:length(fu1)] .= _vec(fu1) - # linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, - # b = cache.rhs_tmp, linu = _vec(cache.du), p = p, reltol = cache.abstol) - # _vec(cache.v) .= -_vec(cache.du) - # else - # mul!(_vec(cache.u_tmp), J', _vec(fu1)) - # @. cache.mat_tmp = JᵀJ + λ * DᵀD - # linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp), - # b = _vec(cache.u_tmp), linu = _vec(cache.du), p = p, reltol = cache.abstol) - # cache.linsolve = linres.cache - # _vec(cache.v) .= -_vec(cache.du) - # end - - # update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, - # cache.v) - - # # Geodesic acceleration (step_size = v + a / 2). - # @unpack v, α_geodesic, h = cache - # cache.u_tmp .= _restructure(cache.u_tmp, _vec(u) .+ h .* _vec(v)) - # f(cache.fu_tmp, cache.u_tmp, p) - - # # The following lines do: cache.a = -J \ cache.fu_tmp - # # NOTE: Don't pass `A` in again, since we want to reuse the previous solve - # mul!(_vec(cache.Jv), J, _vec(v)) - # @. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu1) / h - cache.Jv) - # if fastls - # cache.rhs_tmp[1:length(fu1)] .= _vec(cache.fu_tmp) - # linres = dolinsolve(alg.precs, linsolve; b = cache.rhs_tmp, linu = _vec(cache.du), - # p = p, reltol = cache.abstol) - # else - # mul!(_vec(cache.u_tmp), J', _vec(cache.fu_tmp)) - # linres = dolinsolve(alg.precs, linsolve; b = _vec(cache.u_tmp), - # linu = _vec(cache.du), p = p, reltol = cache.abstol) - # cache.linsolve = linres.cache - # @. cache.a = -cache.du - # end + if fastls + if setindex_trait(cache.mat_tmp) === CanSetindex() + copyto!(@view(cache.mat_tmp[1:length(cache.fu), :]), cache.J) + cache.mat_tmp[(length(cache.fu) + 1):end, :] .= cache.λ .* cache.DᵀD + else + cache.mat_tmp = _vcat(cache.J, cache.λ .* cache.DᵀD) + end + if setindex_trait(cache.rhs_tmp) === CanSetindex() + cache.rhs_tmp[1:length(cache.fu)] .= _vec(cache.fu) + else + cache.rhs_tmp = _vcat(_vec(cache.fu), zero(_vec(cache.u))) + end + linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, + b = cache.rhs_tmp, linu = _vec(cache.v), cache.p, reltol = cache.abstol) + @bb @. cache.v = -linres.u + else + @bb cache.u_cache_2 = transpose(J) × cache.fu + @bb @. cache.mat_tmp = cache.JᵀJ + cache.λ * cache.DᵀD + linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, + b = _vec(cache.u_cache_2), linu = _vec(cache.v), cache.p, reltol = cache.abstol) + cache.linsolve = linres.cache + @bb @. cache.v = -linres.u + end + + update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, + cache.v) + + # Geodesic acceleration (step_size = v + a / 2). + @bb @. cache.u_cache_2 = cache.u + cache.h * cache.v + evaluate_f(cache, cache.u_cache_2, cache.p, Val(:fu_cache_2)) + + # The following lines do: cache.a = -J \ cache.fu_tmp + # NOTE: Don't pass `A` in again, since we want to reuse the previous solve + @bb cache.Jv = cache.J × cache.v + @bb @. cache.fu_cache_2 = (2 / cache.h) * + ((cache.fu_cache_2 - cache.fu) / cache.h - cache.Jv) + if fastls + if setindex_trait(cache.rhs_tmp) === CanSetindex() + cache.rhs_tmp[1:length(cache.fu)] .= _vec(cache.fu_cache_2) + else + cache.rhs_tmp = _vcat(_vec(cache.fu_cache_2), zero(_vec(cache.u))) + end + linres = dolinsolve(alg.precs, linsolve; b = cache.rhs_tmp, linu = _vec(cache.a), + cache.p, reltol = cache.abstol) + @bb @. cache.a = -linres.u + else + @bb cache.u_cache_2 = transpose(J) × cache.fu_cache_2 + linres = dolinsolve(alg.precs, linsolve; b = _vec(cache.u_cache_2), + linu = _vec(cache.a), cache.p, reltol = cache.abstol) + cache.linsolve = linres.cache + @bb @. cache.a = -linres.du + end cache.stats.nsolve += 2 cache.stats.nfactors += 2 # Require acceptable steps to satisfy the following condition. - norm_v = cache.internalnorm(v) - if 2 * cache.internalnorm(cache.a) ≤ α_geodesic * norm_v - # _vec(cache.δ) .= _vec(v) .+ _vec(cache.a) ./ 2 - # @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache - # f(cache.fu_tmp, u .+ δ, p) - # loss = cache.internalnorm(cache.fu_tmp) - - # # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). - # β = dot(v, v_old) / (norm_v * norm_v_old) - # if (1 - β)^b_uphill * loss ≤ loss_old - # # Accept step. - # cache.u .+= δ - # check_and_update!(cache.tc_cache_1, cache, cache.fu_tmp, cache.u, cache.u_prev) - # if !cache.force_stop && cache.tc_cache_2 !== nothing - # # For NLLS Problems - # cache.fu1 .= cache.fu_tmp .- cache.fu1 - # check_and_update!(cache.tc_cache_2, cache, cache.fu1, cache.u, cache.u_prev) - # end - # cache.fu1 .= cache.fu_tmp - # _vec(cache.v_old) .= _vec(v) - # cache.norm_v_old = norm_v - # cache.loss_old = loss - # cache.λ_factor = 1 / cache.damping_decrease_factor - # cache.make_new_J = true - # end + norm_v = cache.internalnorm(cache.v) + if 2 * cache.internalnorm(cache.a) ≤ cache.α_geodesic * norm_v + @bb @. cache.du_cache = cache.v + cache.a / 2 + @bb @. cache.u_cache_2 = cache.u + cache.du_cache + evaluate_f(cache, cache.u_cache_2, cache.p, Val(:fu_cache_2)) + loss = cache.internalnorm(cache.fu_cache_2) + + # Condition to accept uphill steps (evaluates to `loss ≤ loss_old` in iteration 1). + β = dot(cache.v, cache.v_cache) / (norm_v * cache.norm_v_old) + if (1 - β)^cache.b_uphill * loss ≤ cache.loss_old + # Accept step. + @bb copyto!(cache.u, cache.u_cache_2) + check_and_update!(cache.tc_cache_1, cache, cache.fu_cache, cache.u, + cache.u_cache) + if !cache.force_stop && cache.tc_cache_2 !== nothing # For NLLS Problems + @bb @. cache.fu = cache.fu_cache_2 - cache.fu + check_and_update!(cache.tc_cache_2, cache, cache.fu, cache.u, cache.u_cache) + end + @bb copyto!(cache.fu_cache, cache.fu_cache_2) + @bb copyto!(cache.v_cache, cache.v) + cache.norm_v_old = norm_v + cache.loss_old = loss + cache.λ_factor = 1 / cache.damping_decrease_factor + cache.make_new_J = true + end end @bb copyto!(cache.u_cache, cache.u) diff --git a/src/utils.jl b/src/utils.jl index a6b95c9ef..787d697d6 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -447,3 +447,21 @@ function __sum_JᵀJ!!(y, J) return sum(abs2, J'; dims = 1) end end + +function __update_LM_diagonal!!(y::Diagonal, x::AbstractVector) + if setindex_trait(y.diag) === CanSetindex() + @. y.diag = max(y.diag, x) + return y + else + return Diagonal(max.(y.diag, x)) + end +end +@views function __update_LM_diagonal!!(y::Diagonal, x::AbstractMatrix) + x_diag = x[diagind(x)] + if setindex_trait(y.diag) === CanSetindex() + @. y.diag = max(y.diag, x_diag) + return y + else + return Diagonal(max.(y.diag, x_diag)) + end +end From c8f728326c2444838c5a6702914ae8a82724fd3d Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 4 Dec 2023 12:55:25 -0500 Subject: [PATCH 16/25] Patch tracing and LM --- src/levenberg.jl | 6 +++--- src/trace.jl | 8 ++++---- src/utils.jl | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/levenberg.jl b/src/levenberg.jl index 1836bceaa..f47c36347 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -273,7 +273,7 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, b = cache.rhs_tmp, linu = _vec(cache.v), cache.p, reltol = cache.abstol) @bb @. cache.v = -linres.u else - @bb cache.u_cache_2 = transpose(J) × cache.fu + @bb cache.u_cache_2 = transpose(cache.J) × cache.fu @bb @. cache.mat_tmp = cache.JᵀJ + cache.λ * cache.DᵀD linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, b = _vec(cache.u_cache_2), linu = _vec(cache.v), cache.p, reltol = cache.abstol) @@ -288,7 +288,7 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, @bb @. cache.u_cache_2 = cache.u + cache.h * cache.v evaluate_f(cache, cache.u_cache_2, cache.p, Val(:fu_cache_2)) - # The following lines do: cache.a = -J \ cache.fu_tmp + # The following lines do: cache.a = -cache.mat_tmp \ cache.fu_tmp # NOTE: Don't pass `A` in again, since we want to reuse the previous solve @bb cache.Jv = cache.J × cache.v @bb @. cache.fu_cache_2 = (2 / cache.h) * @@ -332,7 +332,7 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, @bb @. cache.fu = cache.fu_cache_2 - cache.fu check_and_update!(cache.tc_cache_2, cache, cache.fu, cache.u, cache.u_cache) end - @bb copyto!(cache.fu_cache, cache.fu_cache_2) + @bb copyto!(cache.fu, cache.fu_cache_2) @bb copyto!(cache.v_cache, cache.v) cache.norm_v_old = norm_v cache.loss_old = loss diff --git a/src/trace.jl b/src/trace.jl index 39c01d2c7..9e042f0bc 100644 --- a/src/trace.jl +++ b/src/trace.jl @@ -209,8 +209,8 @@ function update_trace!(trace::NonlinearSolveTrace{ShT, StT}, iter, u, fu, J, δu return trace end - show_now = ShT && (iter % trace.trace_level.print_frequency == 1) - store_now = StT && (iter % trace.trace_level.store_frequency == 1) + show_now = ShT && (mod1(iter, trace.trace_level.print_frequency) == 1) + store_now = StT && (mod1(iter, trace.trace_level.store_frequency) == 1) (show_now || store_now) && (entry = __trace_entry(trace.trace_level, iter, u, fu, J, δu, α)) store_now && push!(trace.history, entry) @@ -230,8 +230,8 @@ function update_trace_with_invJ!(trace::NonlinearSolveTrace{ShT, StT}, iter, u, return trace end - show_now = ShT && (iter % trace.trace_level.print_frequency == 1) - store_now = StT && (iter % trace.trace_level.store_frequency == 1) + show_now = ShT && (mod1(iter, trace.trace_level.print_frequency) == 1) + store_now = StT && (mod1(iter, trace.trace_level.store_frequency) == 1) if show_now || store_now J_ = trace.trace_level isa TraceMinimal ? J : inv(J) entry = __trace_entry(trace.trace_level, iter, u, fu, J_, δu, α) diff --git a/src/utils.jl b/src/utils.jl index 787d697d6..0b64ea839 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -448,7 +448,8 @@ function __sum_JᵀJ!!(y, J) end end -function __update_LM_diagonal!!(y::Diagonal, x::AbstractVector) +@inline __update_LM_diagonal!!(y::Number, x::Number) = max(y, x) +@inline function __update_LM_diagonal!!(y::Diagonal, x::AbstractVector) if setindex_trait(y.diag) === CanSetindex() @. y.diag = max(y.diag, x) return y @@ -456,7 +457,7 @@ function __update_LM_diagonal!!(y::Diagonal, x::AbstractVector) return Diagonal(max.(y.diag, x)) end end -@views function __update_LM_diagonal!!(y::Diagonal, x::AbstractMatrix) +@inline @views function __update_LM_diagonal!!(y::Diagonal, x::AbstractMatrix) x_diag = x[diagind(x)] if setindex_trait(y.diag) === CanSetindex() @. y.diag = max(y.diag, x_diag) From 13e590e45991b96cd12a945d3488e3890493c4fc Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 4 Dec 2023 14:00:56 -0500 Subject: [PATCH 17/25] LM Fixed --- src/jacobian.jl | 6 +++--- src/levenberg.jl | 31 +++++++++++++------------------ src/pseudotransient.jl | 2 +- src/utils.jl | 18 +++++++++++++----- test/23_test_problems.jl | 4 ++-- 5 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/jacobian.jl b/src/jacobian.jl index 03c2492fe..2e539fcd8 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -138,13 +138,13 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u::Number, kwargs...) where {needsJᵀJ, F} # NOTE: Scalar `u` assumes scalar output from `f` uf = SciMLBase.JacobianWrapper{false}(f, p) - needsJᵀJ && return uf, nothing, u, nothing, nothing, u, u, u - return uf, FakeLinearSolveJLCache(u, u), u, nothing, nothing, u + return uf, FakeLinearSolveJLCache(u, u), u, nothing, nothing, u, u, u end # Linear Solve Cache function linsolve_caches(A, b, u, p, alg; linsolve_kwargs = (;)) - if alg.linsolve === nothing && A isa SMatrix && linsolve_kwargs === (;) + if A isa Number || + (alg.linsolve === nothing && A isa SMatrix && linsolve_kwargs === (;)) # Default handling for SArrays in LinearSolve is not great. Some parts are patched # but there are quite a few unnecessary allocations return FakeLinearSolveJLCache(A, b) diff --git a/src/levenberg.jl b/src/levenberg.jl index f47c36347..9463a7c34 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -120,8 +120,6 @@ end fu fu_cache fu_cache_2 - du - du_cache J JᵀJ Jv @@ -197,9 +195,7 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, loss = internalnorm(fu) - @bb a = similar(du) - @bb v_old = copy(v) - @bb δ = similar(du) + a = du # `du` is not used anywhere, use it to store `a` make_new_J = true @@ -215,8 +211,7 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(__zero, J), du; kwargs...) if !fastls - @bb mat_tmp = similar(JᵀJ) - @bb mat_tmp .*= T(0) + @bb mat_tmp = zero(JᵀJ) rhs_tmp = nothing else mat_tmp = _vcat(J, DᵀD) @@ -229,15 +224,14 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, @bb u_cache = copy(u) @bb u_cache_2 = similar(u) @bb fu_cache_2 = similar(fu) - @bb du_cache = similar(du) Jv = J * v - @bb v_cache = similar(v) + @bb v_cache = zero(v) return LevenbergMarquardtCache{iip, fastls}(f, alg, u, u_cache, u_cache_2, fu, fu_cache, - fu_cache_2, du, du_cache, J, JᵀJ, Jv, DᵀD, v, v_cache, a, mat_tmp, rhs_tmp, p, uf, + fu_cache_2, J, JᵀJ, Jv, DᵀD, v, v_cache, a, mat_tmp, rhs_tmp, p, uf, linsolve, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, λ, λ_factor, damping_increase_factor, damping_decrease_factor, h, - α_geodesic, b_uphill, min_damping_D, internalnorm(v_cache), loss, make_new_J, + α_geodesic, b_uphill, min_damping_D, loss, loss, make_new_J, NLStats(1, 0, 0, 0, 0), tc_cache_1, tc_cache_2, trace) end @@ -271,11 +265,12 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, end linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, b = cache.rhs_tmp, linu = _vec(cache.v), cache.p, reltol = cache.abstol) + cache.linsolve = linres.cache @bb @. cache.v = -linres.u else @bb cache.u_cache_2 = transpose(cache.J) × cache.fu @bb @. cache.mat_tmp = cache.JᵀJ + cache.λ * cache.DᵀD - linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, + linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp), b = _vec(cache.u_cache_2), linu = _vec(cache.v), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache @bb @. cache.v = -linres.u @@ -289,7 +284,7 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, evaluate_f(cache, cache.u_cache_2, cache.p, Val(:fu_cache_2)) # The following lines do: cache.a = -cache.mat_tmp \ cache.fu_tmp - # NOTE: Don't pass `A` in again, since we want to reuse the previous solve + # NOTE: Don't pass `A`` in again, since we want to reuse the previous solve @bb cache.Jv = cache.J × cache.v @bb @. cache.fu_cache_2 = (2 / cache.h) * ((cache.fu_cache_2 - cache.fu) / cache.h - cache.Jv) @@ -301,13 +296,14 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, end linres = dolinsolve(alg.precs, linsolve; b = cache.rhs_tmp, linu = _vec(cache.a), cache.p, reltol = cache.abstol) + cache.linsolve = linres.cache @bb @. cache.a = -linres.u else - @bb cache.u_cache_2 = transpose(J) × cache.fu_cache_2 + @bb cache.u_cache_2 = transpose(cache.J) × cache.fu_cache_2 linres = dolinsolve(alg.precs, linsolve; b = _vec(cache.u_cache_2), linu = _vec(cache.a), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache - @bb @. cache.a = -linres.du + @bb @. cache.a = -linres.u end cache.stats.nsolve += 2 @@ -316,8 +312,7 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, # Require acceptable steps to satisfy the following condition. norm_v = cache.internalnorm(cache.v) if 2 * cache.internalnorm(cache.a) ≤ cache.α_geodesic * norm_v - @bb @. cache.du_cache = cache.v + cache.a / 2 - @bb @. cache.u_cache_2 = cache.u + cache.du_cache + @bb @. cache.u_cache_2 = cache.u + cache.v + cache.a / 2 evaluate_f(cache, cache.u_cache_2, cache.p, Val(:fu_cache_2)) loss = cache.internalnorm(cache.fu_cache_2) @@ -326,7 +321,7 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, if (1 - β)^cache.b_uphill * loss ≤ cache.loss_old # Accept step. @bb copyto!(cache.u, cache.u_cache_2) - check_and_update!(cache.tc_cache_1, cache, cache.fu_cache, cache.u, + check_and_update!(cache.tc_cache_1, cache, cache.fu_cache_2, cache.u, cache.u_cache) if !cache.force_stop && cache.tc_cache_2 !== nothing # For NLLS Problems @bb @. cache.fu = cache.fu_cache_2 - cache.fu diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index d4a41015a..dfaf80180 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -112,12 +112,12 @@ function perform_step!(cache::PseudoTransientCache{iip}) where {iip} if cache.J isa SciMLOperators.AbstractSciMLOperator A = cache.J - inv_α * I elseif setindex_trait(cache.J) === CanSetindex() - idxs = diagind(cache.J) if fast_scalar_indexing(cache.J) @inbounds for i in axes(cache.J, 1) cache.J[i, i] = cache.J[i, i] - inv_α end else + idxs = diagind(cache.J) @.. broadcast=false @view(cache.J[idxs])=@view(cache.J[idxs]) - inv_α end A = cache.J diff --git a/src/utils.jl b/src/utils.jl index 0b64ea839..e19771ef7 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -457,12 +457,20 @@ end return Diagonal(max.(y.diag, x)) end end -@inline @views function __update_LM_diagonal!!(y::Diagonal, x::AbstractMatrix) - x_diag = x[diagind(x)] +@inline function __update_LM_diagonal!!(y::Diagonal, x::AbstractMatrix) if setindex_trait(y.diag) === CanSetindex() - @. y.diag = max(y.diag, x_diag) - return y + if fast_scalar_indexing(y.diag) + @inbounds for i in axes(x, 1) + y.diag[i] = max(y.diag[i], x[i, i]) + end + return y + else + idxs = diagind(x) + @.. broadcast=false y.diag=max(y.diag, @view(x[idxs])) + return y + end else - return Diagonal(max.(y.diag, x_diag)) + idxs = diagind(x) + return Diagonal(@.. broadcast=false max(y.diag, @view(x[idxs]))) end end diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 8f6519e73..741402057 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -73,8 +73,8 @@ end # dictionary with indices of test problems where method does not converge to small residual broken_tests = Dict(alg => Int[] for alg in alg_ops) - broken_tests[alg_ops[1]] = [3, 6, 17, 21] - broken_tests[alg_ops[2]] = [3, 6, 17, 21] + broken_tests[alg_ops[1]] = [3, 6, 11, 17, 21] + broken_tests[alg_ops[2]] = [3, 6, 11, 17, 21] broken_tests[alg_ops[3]] = [6, 11, 17, 21] test_on_library(problems, dicts, alg_ops, broken_tests) From 445e97b92bfc37d5b3d3443fec63b3239dee2269 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 4 Dec 2023 20:30:13 -0500 Subject: [PATCH 18/25] Trust Region mostly works --- src/NonlinearSolve.jl | 98 +++++----- src/gaussnewton.jl | 4 +- src/jacobian.jl | 46 +++-- src/trustRegion.jl | 433 ++++++++++++++++-------------------------- src/utils.jl | 8 - 5 files changed, 245 insertions(+), 344 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 578343345..278667790 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -169,7 +169,7 @@ include("trace.jl") include("extension_algs.jl") include("linesearch.jl") include("raphson.jl") -# include("trustRegion.jl") +include("trustRegion.jl") include("levenberg.jl") include("gaussnewton.jl") include("dfsane.jl") @@ -179,54 +179,54 @@ include("klement.jl") include("lbroyden.jl") include("jacobian.jl") include("ad.jl") -# include("default.jl") - -# @setup_workload begin -# nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1), -# (NonlinearFunction{false}((u, p) -> u .* u .- p), [0.1]), -# (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1])) -# probs_nls = NonlinearProblem[] -# for T in (Float32, Float64), (fn, u0) in nlfuncs -# push!(probs_nls, NonlinearProblem(fn, T.(u0), T(2))) -# end - -# nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), -# GeneralBroyden(), GeneralKlement(), DFSane(), nothing) - -# probs_nlls = NonlinearLeastSquaresProblem[] -# nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]), -# (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]), -# (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p, -# resid_prototype = zeros(1)), [0.1, 0.0]), -# (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), -# resid_prototype = zeros(4)), [0.1, 0.1])) -# for (fn, u0) in nlfuncs -# push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0)) -# end -# nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), Float32[0.1, 0.0]), -# (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), -# Float32[0.1, 0.1]), -# (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p, -# resid_prototype = zeros(Float32, 1)), Float32[0.1, 0.0]), -# (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), -# resid_prototype = zeros(Float32, 4)), Float32[0.1, 0.1])) -# for (fn, u0) in nlfuncs -# push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0f0)) -# end - -# nlls_algs = (LevenbergMarquardt(), GaussNewton(), -# LevenbergMarquardt(; linsolve = LUFactorization()), -# GaussNewton(; linsolve = LUFactorization())) - -# @compile_workload begin -# for prob in probs_nls, alg in nls_algs -# solve(prob, alg, abstol = 1e-2) -# end -# for prob in probs_nlls, alg in nlls_algs -# solve(prob, alg, abstol = 1e-2) -# end -# end -# end +include("default.jl") + +@setup_workload begin + nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1), + (NonlinearFunction{false}((u, p) -> u .* u .- p), [0.1]), + (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1])) + probs_nls = NonlinearProblem[] + for T in (Float32, Float64), (fn, u0) in nlfuncs + push!(probs_nls, NonlinearProblem(fn, T.(u0), T(2))) + end + + nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), + GeneralBroyden(), GeneralKlement(), DFSane(), nothing) + + probs_nlls = NonlinearLeastSquaresProblem[] + nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]), + (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]), + (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p, + resid_prototype = zeros(1)), [0.1, 0.0]), + (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), + resid_prototype = zeros(4)), [0.1, 0.1])) + for (fn, u0) in nlfuncs + push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0)) + end + nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), Float32[0.1, 0.0]), + (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), + Float32[0.1, 0.1]), + (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p, + resid_prototype = zeros(Float32, 1)), Float32[0.1, 0.0]), + (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), + resid_prototype = zeros(Float32, 4)), Float32[0.1, 0.1])) + for (fn, u0) in nlfuncs + push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0f0)) + end + + nlls_algs = (LevenbergMarquardt(), GaussNewton(), + LevenbergMarquardt(; linsolve = LUFactorization()), + GaussNewton(; linsolve = LUFactorization())) + + @compile_workload begin + for prob in probs_nls, alg in nls_algs + solve(prob, alg, abstol = 1e-2) + end + for prob in probs_nlls, alg in nlls_algs + solve(prob, alg, abstol = 1e-2) + end + end +end export RadiusUpdateSchemes diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl index 94f2e975a..9a227a7fa 100644 --- a/src/gaussnewton.jl +++ b/src/gaussnewton.jl @@ -116,8 +116,8 @@ function perform_step!(cache::GaussNewtonCache{iip}) where {iip} # Use normal form to solve the Linear Problem if cache.JᵀJ !== nothing - __update_JᵀJ!(cache, Val(:JᵀJ)) - __update_Jᵀf!(cache, Val(:JᵀJ)) + __update_JᵀJ!(cache) + __update_Jᵀf!(cache) A, b = __maybe_symmetric(cache.JᵀJ), _vec(cache.Jᵀf) else A, b = cache.J, _vec(cache.fu) diff --git a/src/jacobian.jl b/src/jacobian.jl index 2e539fcd8..cd84b5d1d 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -138,7 +138,7 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u::Number, kwargs...) where {needsJᵀJ, F} # NOTE: Scalar `u` assumes scalar output from `f` uf = SciMLBase.JacobianWrapper{false}(f, p) - return uf, FakeLinearSolveJLCache(u, u), u, nothing, nothing, u, u, u + return uf, FakeLinearSolveJLCache(u, u), u, zero(u), nothing, u, u, u end # Linear Solve Cache @@ -208,27 +208,49 @@ function __concrete_vjp_autodiff(vjp_autodiff, jvp_autodiff, uf) end end +# jvp fallback scalar +__jacvec(args...; kwargs...) = JacVec(args...; kwargs...) +function __jacvec(uf, u::Number; autodiff, kwargs...) + @assert autodiff isa AutoForwardDiff "Only ForwardDiff is currently supported." + return JVPScalar(uf, u, autodiff) +end + +@concrete mutable struct JVPScalar + uf + u + autodiff +end + +function Base.:*(jvp::JVPScalar, v) + T = typeof(ForwardDiff.Tag(typeof(jvp.uf), typeof(jvp.u))) + out = jvp.uf(ForwardDiff.Dual{T}(jvp.u, v)) + return ForwardDiff.extract_derivative(T, out) +end + # Generic Handling of Krylov Methods for Normal Form Linear Solves -function __update_JᵀJ!(cache::AbstractNonlinearSolveCache) +function __update_JᵀJ!(cache::AbstractNonlinearSolveCache, J = nothing) if !(cache.JᵀJ isa KrylovJᵀJ) - @bb cache.JᵀJ = transpose(cache.J) × cache.J + J_ = ifelse(J === nothing, cache.J, J) + @bb cache.JᵀJ = transpose(J_) × J_ end end -function __update_Jᵀf!(cache::AbstractNonlinearSolveCache) +function __update_Jᵀf!(cache::AbstractNonlinearSolveCache, J = nothing) if cache.JᵀJ isa KrylovJᵀJ @bb cache.Jᵀf = cache.JᵀJ.Jᵀ × cache.fu else - @bb cache.Jᵀf = transpose(cache.J) × vec(cache.fu) + J_ = ifelse(J === nothing, cache.J, J) + @bb cache.Jᵀf = transpose(J_) × vec(cache.fu) end end # Left-Right Multiplication -__lr_mul(::Val, H, g) = dot(g, H, g) -## TODO: Use a cache here to avoid allocations -__lr_mul(::Val{false}, H::KrylovJᵀJ, g) = dot(g, H.JᵀJ, g) -function __lr_mul(::Val{true}, H::KrylovJᵀJ, g) - c = similar(g) - mul!(c, H.JᵀJ, g) - return dot(g, c) +__lr_mul(cache::AbstractNonlinearSolveCache) = __lr_mul(cache, cache.JᵀJ, cache.Jᵀf) +function __lr_mul(cache::AbstractNonlinearSolveCache, JᵀJ::KrylovJᵀJ, Jᵀf) + @bb cache.lr_mul_cache = JᵀJ.JᵀJ × vec(Jᵀf) + return dot(_vec(Jᵀf), _vec(cache.lr_mul_cache)) +end +function __lr_mul(cache::AbstractNonlinearSolveCache, JᵀJ, Jᵀf) + @bb cache.lr_mul_cache = JᵀJ × Jᵀf + return dot(_vec(Jᵀf), _vec(cache.lr_mul_cache)) end diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 7e5497ffd..f27259d3f 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -182,19 +182,26 @@ function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAU expand_threshold, shrink_factor, expand_factor, max_shrink_times, vjp_autodiff) end -@concrete mutable struct TrustRegionCache{iip, trustType, floatType} <: - AbstractNonlinearSolveCache{iip} +@concrete mutable struct TrustRegionCache{iip} <: AbstractNonlinearSolveCache{iip} f alg - u_prev u - fu_prev + u_cache + u_cache_2 + u_gauss_newton + u_cauchy fu - fu2 + fu_cache + fu_cache_2 + J + J_cache + JᵀJ + Jᵀf p uf + du + lr_mul_cache linsolve - J jac_cache force_stop::Bool maxiters::Int @@ -204,60 +211,55 @@ end reltol prob radius_update_scheme::RadiusUpdateSchemes.T - trust_r::trustType - max_trust_r::trustType + trust_r + max_trust_r step_threshold - shrink_threshold::trustType - expand_threshold::trustType - shrink_factor::trustType - expand_factor::trustType - loss::floatType - loss_new::floatType - H - g + shrink_threshold + expand_threshold + shrink_factor + expand_factor + loss + loss_new shrink_counter::Int - du - u_tmp - u_gauss_newton - u_cauchy - fu_new make_new_J::Bool - r::floatType - p1::floatType - p2::floatType - p3::floatType - p4::floatType - ϵ::floatType + r + p1 + p2 + p3 + p4 + ϵ + jvp_operator # For Yuan stats::NLStats tc_cache trace end -# TODO: add J_cache function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, - termination_condition = nothing, internalnorm = DEFAULT_NORM, linsolve_kwargs = (;), - kwargs...) where {uType, iip} + termination_condition = nothing, internalnorm = Base.Fix2(norm, 2), + linsolve_kwargs = (;), kwargs...) where {uType, iip} alg = get_concrete_algorithm(alg_, prob) @unpack f, u0, p = prob - u = alias_u0 ? u0 : deepcopy(u0) - u_prev = zero(u) - fu1 = evaluate_f(prob, u) - fu_prev = zero(fu1) + u = __maybe_unaliased(u0, alias_u0) + @bb u_cache = copy(u) + @bb u_cache_2 = similar(u) + fu = evaluate_f(prob, u) + @bb fu_cache_2 = zero(fu) - loss = __get_trust_region_loss(fu1) - uf, _, J, fu2, jac_cache, du, H, g = jacobian_caches(alg, f, u, p, Val(iip); + loss = __trust_region_loss(internalnorm, fu) + + uf, _, J, fu_cache, jac_cache, du, JᵀJ, Jᵀf = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs, linsolve_with_JᵀJ = Val(true), lininit = Val(false)) - g = _restructure(fu1, g) - linsolve = u isa Number ? nothing : linsolve_caches(J, fu2, du, p, alg) + linsolve = linsolve_caches(J, fu_cache, du, p, alg) - u_tmp = zero(u) - u_cauchy = zero(u) - u_gauss_newton = _mutable_zero(u) + @bb u_cache_2 = similar(u) + @bb u_cauchy = similar(u) + @bb u_gauss_newton = similar(u) + @bb J_cache = similar(J) + @bb lr_mul_cache = similar(du) loss_new = loss shrink_counter = 0 - fu_new = zero(fu1) make_new_J = true r = loss @@ -270,11 +272,13 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, trustType = floatType if radius_update_scheme == RadiusUpdateSchemes.NLsolve max_trust_radius = convert(trustType, Inf) - initial_trust_radius = norm(u0) > 0 ? convert(trustType, norm(u0)) : one(trustType) + initial_trust_radius = internalnorm(u0) > 0 ? convert(trustType, internalnorm(u0)) : + one(trustType) else max_trust_radius = convert(trustType, alg.max_trust_radius) if iszero(max_trust_radius) - max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u))) + max_trust_radius = convert(trustType, + max(internalnorm(fu), maximum(u) - minimum(u))) end initial_trust_radius = convert(trustType, alg.initial_trust_radius) if iszero(initial_trust_radius) @@ -293,6 +297,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, p3 = convert(floatType, 0.0) p4 = convert(floatType, 0.0) ϵ = convert(floatType, 1.0e-8) + jvp_operator = nothing if radius_update_scheme === RadiusUpdateSchemes.NLsolve p1 = convert(floatType, 0.5) elseif radius_update_scheme === RadiusUpdateSchemes.Hei @@ -311,16 +316,9 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, p1 = convert(floatType, 2.0) # μ p2 = convert(floatType, 1 / 6) # c5 p3 = convert(floatType, 6.0) # c6 - if iip - auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu1) - else - if isa(u, Number) - g = ForwardDiff.derivative(x -> f(x, p), u) - else - g = auto_jacvec(x -> f(x, p), u, fu1) - end - end - initial_trust_radius = convert(trustType, p1 * norm(g)) + jvp_operator = __jacvec(uf, u; fu, autodiff = __get_nonsparse_ad(alg.ad)) + @bb Jᵀf = jvp_operator × fu + initial_trust_radius = convert(trustType, p1 * internalnorm(Jᵀf)) elseif radius_update_scheme === RadiusUpdateSchemes.Fan step_threshold = convert(trustType, 0.0001) shrink_threshold = convert(trustType, 0.25) @@ -329,7 +327,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, p2 = convert(floatType, 0.25) # c5 p3 = convert(floatType, 12.0) # c6 p4 = convert(floatType, 1.0e18) # M - initial_trust_radius = convert(trustType, p1 * (norm(fu1)^0.99)) + initial_trust_radius = convert(trustType, p1 * (internalnorm(fu)^0.99)) elseif radius_update_scheme === RadiusUpdateSchemes.Bastin step_threshold = convert(trustType, 0.05) shrink_threshold = convert(trustType, 0.05) @@ -339,25 +337,25 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, initial_trust_radius = convert(trustType, 1.0) end - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu1, u, + abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u, termination_condition) - trace = init_nonlinearsolve_trace(alg, u, fu1, ApplyArray(__zero, J), du; kwargs...) + trace = init_nonlinearsolve_trace(alg, u, fu, ApplyArray(__zero, J), du; kwargs...) - return TrustRegionCache{iip}(f, alg, u_prev, u, fu_prev, fu1, fu2, p, uf, linsolve, J, + return TrustRegionCache{iip}(f, alg, u, u_cache, u_cache_2, u_gauss_newton, u_cauchy, + fu, fu_cache, fu_cache_2, J, J_cache, JᵀJ, Jᵀf, p, uf, du, lr_mul_cache, linsolve, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold, shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new, - H, g, shrink_counter, du, u_tmp, u_gauss_newton, u_cauchy, fu_new, make_new_J, r, - p1, p2, p3, p4, ϵ, NLStats(1, 0, 0, 0, 0), tc_cache, trace) + shrink_counter, make_new_J, r, p1, p2, p3, p4, ϵ, jvp_operator, + NLStats(1, 0, 0, 0, 0), tc_cache, trace) end function perform_step!(cache::TrustRegionCache{iip}) where {iip} if cache.make_new_J cache.J = jacobian!!(cache.J, cache) - __update_JᵀJ!(Val{iip}(), cache, :H, cache.J) - __update_Jᵀf!(Val{iip}(), cache, :g, :H, cache.J, _vec(cache.fu)) - cache.stats.njacs += 1 + __update_JᵀJ!(cache) + __update_Jᵀf!(cache) # do not use A = cache.H, b = _vec(cache.g) since it is equivalent # to A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular @@ -374,7 +372,7 @@ function perform_step!(cache::TrustRegionCache{iip}) where {iip} # compute the potentially new u @bb @. cache.u_cache_2 = cache.u + cache.du - evaluate_f(cache, cache.u_tmp, cache.p, Val{:fu_cache_2}()) + evaluate_f(cache, cache.u_cache_2, cache.p, Val{:fu_cache_2}()) trust_region_step!(cache) cache.stats.nsolve += 1 cache.stats.nfactors += 1 @@ -383,278 +381,157 @@ end function retrospective_step!(cache::TrustRegionCache{iip}) where {iip} J = jacobian!!(cache.J_cache, cache) - __update_JᵀJ!(Val{iip}(), cache, :H, J) - __update_Jᵀf!(Val{iip}(), cache, :g, :H, J, cache.fu) - cache.stats.njacs += 1 + __update_JᵀJ!(cache, J) + __update_Jᵀf!(cache, J) - # FIXME: Caching in __lr_mul - num = __get_trust_region_loss(cache.fu) - __get_trust_region_loss(cache.fu_cache) - denom = dot(_vec(du), _vec(g)) + __lr_mul(Val{iip}(), H, _vec(du)) / 2 + num = __trust_region_loss(cache, cache.fu) - + __get_trust_region_loss(cache, cache.fu_cache) + denom = dot(_vec(cache.du), _vec(cache.Jᵀf)) + __lr_mul(cache, cache.JᵀJ, cache.du) / 2 return num / denom end -# TODO function trust_region_step!(cache::TrustRegionCache) - @unpack fu_new, du, g, H, loss, max_trust_r, radius_update_scheme = cache - - cache.loss_new = __get_trust_region_loss(fu_new) + cache.loss_new = __trust_region_loss(cache, cache.fu_cache_2) # Compute the ratio of the actual reduction to the predicted reduction. - cache.r = -(loss - cache.loss_new) / - (dot(_vec(du), _vec(g)) + __lr_mul(Val(isinplace(cache)), H, _vec(du)) / 2) - @unpack r = cache + cache.r = -(cache.loss - cache.loss_new) / + (dot(_vec(cache.du), _vec(cache.Jᵀf)) + + __lr_mul(cache, cache.JᵀJ, _vec(cache.du)) / 2) + + @unpack r, radius_update_scheme = cache + make_new_J = false + if r > cache.step_threshold + take_step!(cache) + cache.loss = cache.loss_new + make_new_J = true + end if radius_update_scheme === RadiusUpdateSchemes.Simple - # Update the trust region radius. if r < cache.shrink_threshold cache.trust_r *= cache.shrink_factor cache.shrink_counter += 1 else cache.shrink_counter = 0 - end - if r > cache.step_threshold - take_step!(cache) - cache.loss = cache.loss_new - - # Update the trust region radius. - if r > cache.expand_threshold - cache.trust_r = min(cache.expand_factor * cache.trust_r, max_trust_r) + if r > cache.step_threshold && r > cache.expand_threshold + cache.trust_r = min(cache.expand_factor * cache.trust_r, cache.max_trust_r) end - - cache.make_new_J = true - else - # No need to make a new J, no step was taken, so we try again with a smaller trust_r - cache.make_new_J = false end - update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, - @~(cache.u.-cache.u_prev)) - check_and_update!(cache, cache.fu, cache.u, cache.u_prev) - elseif radius_update_scheme === RadiusUpdateSchemes.NLsolve - # accept/reject decision - if r > cache.step_threshold # accept - take_step!(cache) - cache.loss = cache.loss_new - cache.make_new_J = true - else # reject - cache.make_new_J = false - end - - # trust region update - if r < 1 // 10 # cache.shrink_threshold - cache.trust_r *= 1 // 2 # cache.shrink_factor - elseif r >= 9 // 10 # cache.expand_threshold - cache.trust_r = 2 * norm(cache.du) # cache.expand_factor * norm(cache.du) - elseif r >= 1 // 2 # cache.p1 - cache.trust_r = max(cache.trust_r, 2 * norm(cache.du)) # cache.expand_factor * norm(cache.du)) + if r < 1 // 10 + cache.shrink_counter += 1 + cache.trust_r *= 1 // 2 + else + cache.shrink_counter = 0 + if r ≥ 9 // 10 + cache.trust_r = 2 * cache.internalnorm(cache.du) + elseif r ≥ 1 // 2 + cache.trust_r = max(cache.trust_r, 2 * cache.internalnorm(cache.du)) + end end - - update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, - @~(cache.u.-cache.u_prev)) - # convergence test - check_and_update!(cache, cache.fu, cache.u, cache.u_prev) - elseif radius_update_scheme === RadiusUpdateSchemes.NocedalWright - # accept/reject decision - if r > cache.step_threshold # accept - take_step!(cache) - cache.loss = cache.loss_new - cache.make_new_J = true - else # reject - cache.make_new_J = false - end - if r < 1 // 4 - cache.trust_r = (1 // 4) * norm(cache.du) - elseif (r > (3 // 4)) && abs(norm(cache.du) - cache.trust_r) / cache.trust_r < 1e-6 - cache.trust_r = min(2 * cache.trust_r, cache.max_trust_r) - end - - update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, - @~(cache.u.-cache.u_prev)) - # convergence test - check_and_update!(cache, cache.fu, cache.u, cache.u_prev) - - elseif radius_update_scheme === RadiusUpdateSchemes.Hei - if r > cache.step_threshold - take_step!(cache) - cache.loss = cache.loss_new - cache.make_new_J = true + cache.shrink_counter += 1 + cache.trust_r = (1 // 4) * cache.internalnorm(cache.du) else - cache.make_new_J = false + cache.shrink_counter = 0 + if r > 3 // 4 && + abs(cache.internalnorm(cache.du) - cache.trust_r) < 1e-6 * cache.trust_r + cache.trust_r = min(2 * cache.trust_r, cache.max_trust_r) + end end - # Hei's radius update scheme + elseif radius_update_scheme === RadiusUpdateSchemes.Hei @unpack shrink_threshold, p1, p2, p3, p4 = cache - if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(du) < - cache.trust_r + tr_new = __rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(du) + if tr_new < cache.trust_r cache.shrink_counter += 1 else cache.shrink_counter = 0 end - cache.trust_r = rfunc(r, shrink_threshold, p1, p3, p4, p2) * - cache.internalnorm(du) - - update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, - @~(cache.u.-cache.u_prev)) - check_and_update!(cache, cache.fu, cache.u, cache.u_prev) - cache.internalnorm(g) < cache.ϵ && (cache.force_stop = true) + cache.trust_r = tr_new + cache.internalnorm(cache.Jᵀf) < cache.ϵ && (cache.force_stop = true) elseif radius_update_scheme === RadiusUpdateSchemes.Yuan if r < cache.shrink_threshold cache.p1 = cache.p2 * cache.p1 cache.shrink_counter += 1 - elseif r >= cache.expand_threshold && - cache.internalnorm(du) > cache.trust_r / 2 - cache.p1 = cache.p3 * cache.p1 - cache.shrink_counter = 0 - end - - if r > cache.step_threshold - take_step!(cache) - cache.loss = cache.loss_new - cache.make_new_J = true else - cache.make_new_J = false + if r ≥ cache.expand_threshold && + cache.internalnorm(cache.du) > cache.trust_r / 2 + cache.p1 = cache.p3 * cache.p1 + end + cache.shrink_counter = 0 end - @unpack p1 = cache - # TODO: Use the `vjp_autodiff` to for the jvp - cache.trust_r = p1 * cache.internalnorm(jvp!(cache)) + @bb cache.Jᵀf = cache.jvp_operator × vec(cache.fu) + cache.trust_r = cache.p1 * cache.internalnorm(cache.Jᵀf) - update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, - @~(cache.u.-cache.u_prev)) - check_and_update!(cache, cache.fu, cache.u, cache.u_prev) - cache.internalnorm(g) < cache.ϵ && (cache.force_stop = true) - #Fan's update scheme + cache.internalnorm(cache.Jᵀf) < cache.ϵ && (cache.force_stop = true) elseif radius_update_scheme === RadiusUpdateSchemes.Fan if r < cache.shrink_threshold cache.p1 *= cache.p2 cache.shrink_counter += 1 - elseif r > cache.expand_threshold - cache.p1 = min(cache.p1 * cache.p3, cache.p4) - cache.shrink_counter = 0 - end - - if r > cache.step_threshold - take_step!(cache) - cache.loss = cache.loss_new - cache.make_new_J = true else - cache.make_new_J = false + cache.shrink_counter = 0 + r > cache.expand_threshold && (cache.p1 = min(cache.p1 * cache.p3, cache.p4)) end - - @unpack p1 = cache - cache.trust_r = p1 * (cache.internalnorm(cache.fu)^0.99) - - update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, - @~(cache.u.-cache.u_prev)) - check_and_update!(cache, cache.fu, cache.u, cache.u_prev) - cache.internalnorm(g) < cache.ϵ && (cache.force_stop = true) + cache.trust_r = cache.p1 * (cache.internalnorm(cache.fu)^0.99) + cache.internalnorm(cache.Jᵀf) < cache.ϵ && (cache.force_stop = true) elseif radius_update_scheme === RadiusUpdateSchemes.Bastin if r > cache.step_threshold - take_step!(cache) - cache.loss = cache.loss_new - cache.make_new_J = true - if retrospective_step!(cache) >= cache.expand_threshold + if retrospective_step!(cache) ≥ cache.expand_threshold cache.trust_r = max(cache.p1 * cache.internalnorm(du), cache.trust_r) end - + cache.shrink_counter = 0 else - cache.make_new_J = false cache.trust_r *= cache.p2 cache.shrink_counter += 1 end - - update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, - @~(cache.u.-cache.u_prev)) - check_and_update!(cache, cache.fu, cache.u, cache.u_prev) - end -end - -# TODO -function dogleg!(cache::TrustRegionCache{true}) - @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache - - # Take the full Gauss-Newton step if lies within the trust region. - if norm(u_gauss_newton) ≤ trust_r - cache.du .= u_gauss_newton - return end - # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region - l_grad = norm(cache.g) # length of the gradient - d_cauchy = l_grad^3 / __lr_mul(Val{true}(), cache.H, _vec(cache.g)) # distance of the cauchy point from the current iterate - if d_cauchy >= trust_r - @. cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region - return - end - - # Take the intersection of dogleg with trust region if Cauchy point lies inside the trust region - @. u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point - @. u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation - - a = dot(u_tmp, u_tmp) - b = 2 * dot(u_cauchy, u_tmp) - c = d_cauchy^2 - trust_r^2 - aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues - τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary - - @. cache.du = u_cauchy + τ * u_tmp + update_trace!(cache.trace, cache.stats.nsteps + 1, cache.u, cache.fu, cache.J, + @~(cache.u.-cache.u_cache)) + check_and_update!(cache, cache.fu, cache.u, cache.u_cache) end -# TODO -function dogleg!(cache::TrustRegionCache{false}) - @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache - +function dogleg!(cache::TrustRegionCache{iip}) where {iip} # Take the full Gauss-Newton step if lies within the trust region. - if norm(u_gauss_newton) ≤ trust_r - cache.du = deepcopy(u_gauss_newton) + if cache.internalnorm(cache.u_gauss_newton) ≤ cache.trust_r + @bb copyto!(cache.du, cache.u_gauss_newton) return end - ## Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region - l_grad = norm(cache.g) - d_cauchy = l_grad^3 / __lr_mul(Val{false}(), cache.H, _vec(cache.g)) # distance of the cauchy point from the current iterate - if d_cauchy > trust_r # cauchy point lies outside of trust region - cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region + # Take intersection of steepest descent direction and trust region if Cauchy point lies + # outside of trust region + l_grad = cache.internalnorm(cache.Jᵀf) # length of the gradient + d_cauchy = l_grad^3 / __lr_mul(cache) + if d_cauchy ≥ cache.trust_r + # step to the end of the trust region + @bb @. cache.du = -(cache.trust_r / l_grad) * cache.Jᵀf return end - # Take the intersection of dogleg with trust region if Cauchy point lies inside the trust region - u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point - u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg - a = dot(u_tmp, u_tmp) - b = 2 * dot(u_cauchy, u_tmp) - c = d_cauchy^2 - trust_r^2 - aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues - τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary - - cache.du = u_cauchy + τ * u_tmp + # Take the intersection of dogleg with trust region if Cauchy point lies inside the + # trust region + @bb @. cache.u_cauchy = -(d_cauchy / l_grad) * cache.Jᵀf # compute Cauchy point + @bb @. cache.u_cache_2 = cache.u_gauss_newton - cache.u_cauchy # calf of the dogleg + + a = dot(cache.u_cache_2, cache.u_cache_2) + b = 2 * dot(cache.u_cauchy, cache.u_cache_2) + c = d_cauchy^2 - cache.trust_r^2 + # technically guaranteed to be non-negative but hedging against floating point issues + aux = max(b^2 - 4 * a * c, 0) + # stepsize along dogleg to trust region boundary + τ = (-b + sqrt(aux)) / (2 * a) + + @bb @. cache.du = cache.u_cauchy + τ * cache.u_cache_2 + return end -function __take_step!(cache::TrustRegionCache) +function take_step!(cache::TrustRegionCache) @bb copyto!(cache.u_cache, cache.u) - @bb copyto!(cache.u, cache.u_cache_2) # u_tmp --> u_cache_2 + @bb copyto!(cache.u, cache.u_cache_2) @bb copyto!(cache.fu_cache, cache.fu) - @bb copyto!(cache.fu, cache.fu_cache_2) # fu_new --> fu_cache_2 -end - -# TODO -function jvp!(cache::TrustRegionCache{false}) - @unpack f, u, fu, uf = cache - if isa(u, Number) - return value_derivative(uf, u) - end - return auto_jacvec(uf, u, fu) -end - -function jvp!(cache::TrustRegionCache{true}) - @unpack g, f, u, fu, uf = cache - if isa(u, Number) - return value_derivative(uf, u) - end - auto_jacvec!(g, uf, u, fu) - return g + @bb copyto!(cache.fu, cache.fu_cache_2) end function not_terminated(cache::TrustRegionCache) @@ -670,8 +547,9 @@ function not_terminated(cache::TrustRegionCache) return true end +# FIXME: Update the JacVec Operator for Yuan function __reinit_internal!(cache::TrustRegionCache; kwargs...) - cache.loss = __get_trust_region_loss(cache.fu) + cache.loss = __trust_region_loss(cache, cache.fu) cache.shrink_counter = 0 cache.trust_r = convert(eltype(cache.u), ifelse(cache.alg.initial_trust_radius == 0, cache.alg.initial_trust_radius, @@ -680,4 +558,13 @@ function __reinit_internal!(cache::TrustRegionCache; kwargs...) return nothing end -__get_trust_region_loss(fu) = norm(fu)^2 / 2 +# This only holds for 2-norm? +__trust_region_loss(cache::TrustRegionCache, x) = __trust_region_loss(cache.internalnorm, x) +__trust_region_loss(nf::F, x) where {F} = nf(x)^2 / 2 + +# R-function for adaptive trust region method +function __rfunc(r::R, c2::R, M::R, γ1::R, γ2::R, β::R) where {R <: Real} + return ifelse(r ≥ c2, + (2 * (M - 1 - γ2) * atan(r - c2) + (1 + γ2)) / R(π), + (1 - γ1 - β) * (exp(r - c2) + β / (1 - γ1 - β))) +end \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl index e19771ef7..4d8496015 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -151,14 +151,6 @@ function wrapprecs(_Pl, _Pr, weight) return Pl, Pr end -function rfunc(r::R, c2::R, M::R, γ1::R, γ2::R, β::R) where {R <: Real} # R-function for adaptive trust region method - if (r ≥ c2) - return (2 * (M - 1 - γ2) * atan(r - c2) + (1 + γ2)) / π - else - return (1 - γ1 - β) * (exp(r - c2) + β / (1 - γ1 - β)) - end -end - concrete_jac(_) = nothing concrete_jac(::AbstractNewtonAlgorithm{CJ}) where {CJ} = CJ From cefe5b02a9f71e43d5aef447d137a99668dc22e0 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 4 Dec 2023 22:47:39 -0500 Subject: [PATCH 19/25] Most 23 test problems now pass --- src/trustRegion.jl | 7 +++---- test/23_test_problems.jl | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/trustRegion.jl b/src/trustRegion.jl index f27259d3f..9ed243d26 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -384,8 +384,7 @@ function retrospective_step!(cache::TrustRegionCache{iip}) where {iip} __update_JᵀJ!(cache, J) __update_Jᵀf!(cache, J) - num = __trust_region_loss(cache, cache.fu) - - __get_trust_region_loss(cache, cache.fu_cache) + num = __trust_region_loss(cache, cache.fu) - __trust_region_loss(cache, cache.fu_cache) denom = dot(_vec(cache.du), _vec(cache.Jᵀf)) + __lr_mul(cache, cache.JᵀJ, cache.du) / 2 return num / denom end @@ -441,7 +440,7 @@ function trust_region_step!(cache::TrustRegionCache) end elseif radius_update_scheme === RadiusUpdateSchemes.Hei @unpack shrink_threshold, p1, p2, p3, p4 = cache - tr_new = __rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(du) + tr_new = __rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(cache.du) if tr_new < cache.trust_r cache.shrink_counter += 1 else @@ -479,7 +478,7 @@ function trust_region_step!(cache::TrustRegionCache) elseif radius_update_scheme === RadiusUpdateSchemes.Bastin if r > cache.step_threshold if retrospective_step!(cache) ≥ cache.expand_threshold - cache.trust_r = max(cache.p1 * cache.internalnorm(du), cache.trust_r) + cache.trust_r = max(cache.p1 * cache.internalnorm(cache.du), cache.trust_r) end cache.shrink_counter = 0 else diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 741402057..7642f1ed6 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -59,8 +59,8 @@ end broken_tests = Dict(alg => Int[] for alg in alg_ops) broken_tests[alg_ops[1]] = [6, 11, 21] broken_tests[alg_ops[2]] = [6, 11, 21] - broken_tests[alg_ops[3]] = [1, 6, 11, 12, 15, 16, 21] - broken_tests[alg_ops[4]] = [1, 6, 8, 11, 15, 16, 21, 22] + broken_tests[alg_ops[3]] = [6, 11, 21] + broken_tests[alg_ops[4]] = [6, 11, 21] broken_tests[alg_ops[5]] = [6, 21] broken_tests[alg_ops[6]] = [6, 21] From ee15d8076ba497c152b608b3adb6ca8d54fa9953 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 4 Dec 2023 23:48:31 -0500 Subject: [PATCH 20/25] Fix most tests --- Project.toml | 4 +++- src/NonlinearSolve.jl | 5 +++-- src/dfsane.jl | 5 +++-- src/jacobian.jl | 27 +++++++++++++++++++-------- src/trustRegion.jl | 3 ++- src/utils.jl | 2 +- 6 files changed, 31 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index 9385b14a2..b4977a080 100644 --- a/Project.toml +++ b/Project.toml @@ -60,6 +60,7 @@ LeastSquaresOptim = "0.8" LineSearches = "7" LinearAlgebra = "<0.0.1, 1" LinearSolve = "2.12" +MaybeInplace = "0.1" NaNMath = "1" NonlinearProblemLibrary = "0.1" Pkg = "1" @@ -71,7 +72,7 @@ Reexport = "0.2, 1" SafeTestsets = "0.1" SciMLBase = "2.9" SciMLOperators = "0.3" -SimpleNonlinearSolve = "1" # FIXME: Don't update the version in this PR. Using it to test +SimpleNonlinearSolve = "1" SparseArrays = "<0.0.1, 1" SparseDiffTools = "2.14" StaticArrays = "1" @@ -98,6 +99,7 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 278667790..c6b4fca66 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -17,6 +17,7 @@ import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_work import ConcreteStructs: @concrete import EnumX: @enumx import FastBroadcast: @.. + import FiniteDiff import ForwardDiff import ForwardDiff: Dual import LinearSolve: ComposePreconditioner, InvPreconditioner, needs_concrete_A @@ -56,7 +57,7 @@ function SciMLBase.reinit!(cache::AbstractNonlinearSolveCache{iip}, u0 = get_u(c cache.p = p if iip recursivecopy!(get_u(cache), u0) - cache.f(cache.fu1, get_u(cache), p) + cache.f(get_fu(cache), get_u(cache), p) else cache.u = __maybe_unaliased(u0, alias_u0) set_fu!(cache, cache.f(cache.u, p)) @@ -76,7 +77,7 @@ function SciMLBase.reinit!(cache::AbstractNonlinearSolveCache{iip}, u0 = get_u(c if hasfield(typeof(cache), :ls_cache) # TODO: A more efficient way to do this - cache.ls_cache = init_linesearch_cache(cache.prob, cache.alg.linesearch, cache.f, + cache.ls_cache = init_linesearch_cache(cache.alg.linesearch, cache.f, get_u(cache), p, get_fu(cache), Val(iip)) end diff --git a/src/dfsane.jl b/src/dfsane.jl index 570dd7ccd..689c24485 100644 --- a/src/dfsane.jl +++ b/src/dfsane.jl @@ -55,6 +55,7 @@ Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_ end @concrete mutable struct DFSaneCache{iip} <: AbstractNonlinearSolveCache{iip} + f alg u u_cache @@ -110,8 +111,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::DFSane, args. termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, nothing, du; kwargs...) - return DFSaneCache{iip}(alg, u, u_cache, u_cache_2, fu, fu_cache, du, history, f_norm, - f_norm_0, alg.M, T(alg.σ_1), T(alg.σ_min), T(alg.σ_max), one(T), T(alg.γ), + return DFSaneCache{iip}(prob.f, alg, u, u_cache, u_cache_2, fu, fu_cache, du, history, + f_norm, f_norm_0, alg.M, T(alg.σ_1), T(alg.σ_min), T(alg.σ_max), one(T), T(alg.γ), T(alg.τ_min), T(alg.τ_max), alg.n_exp, prob.p, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache, trace) end diff --git a/src/jacobian.jl b/src/jacobian.jl index cd84b5d1d..2174fbc8e 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -209,10 +209,14 @@ function __concrete_vjp_autodiff(vjp_autodiff, jvp_autodiff, uf) end # jvp fallback scalar -__jacvec(args...; kwargs...) = JacVec(args...; kwargs...) -function __jacvec(uf, u::Number; autodiff, kwargs...) - @assert autodiff isa AutoForwardDiff "Only ForwardDiff is currently supported." - return JVPScalar(uf, u, autodiff) +function __jacvec(uf, u; autodiff, kwargs...) + if !(autodiff isa AutoForwardDiff || autodiff isa AutoFiniteDiff) + _ad = autodiff + autodiff = ifelse(ForwardDiff.can_dual(eltype(u)), AutoForwardDiff(), + AutoFiniteDiff()) + @warn "$(_ad) not supported for JacVec. Using $(autodiff) instead." + end + return u isa Number ? JVPScalar(uf, u, autodiff) : JacVec(uf, u; autodiff, kwargs...) end @concrete mutable struct JVPScalar @@ -221,10 +225,17 @@ end autodiff end -function Base.:*(jvp::JVPScalar, v) - T = typeof(ForwardDiff.Tag(typeof(jvp.uf), typeof(jvp.u))) - out = jvp.uf(ForwardDiff.Dual{T}(jvp.u, v)) - return ForwardDiff.extract_derivative(T, out) +function Base.:*(jvp::JVPScalar, v::Number) + if jvp.autodiff isa AutoForwardDiff + T = typeof(ForwardDiff.Tag(typeof(jvp.uf), typeof(jvp.u))) + out = jvp.uf(ForwardDiff.Dual{T}(jvp.u, v)) + return ForwardDiff.extract_derivative(T, out) + elseif jvp.autodiff isa AutoFiniteDiff + J = FiniteDiff.finite_difference_derivative(jvp.uf, jvp.u, jvp.autodiff.fdtype) + return J * v + else + error("Only ForwardDiff & FiniteDiff is currently supported.") + end end # Generic Handling of Krylov Methods for Normal Form Linear Solves diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 9ed243d26..9087b0d53 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -255,7 +255,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, @bb u_cache_2 = similar(u) @bb u_cauchy = similar(u) @bb u_gauss_newton = similar(u) - @bb J_cache = similar(J) + J_cache = J isa SciMLOperators.AbstractSciMLOperator || + setindex_trait(J) === CannotSetindex() ? J : similar(J) @bb lr_mul_cache = similar(du) loss_new = loss diff --git a/src/utils.jl b/src/utils.jl index 4d8496015..56a976aa8 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -178,7 +178,7 @@ function evaluate_f(prob::Union{NonlinearProblem{uType, iip}, return fu end -function evaluate_f(f::F, u, p, ::Val{iip}; fu = nothing) where {F, iip <: Bool} +function evaluate_f(f::F, u, p, ::Val{iip}; fu = nothing) where {F, iip} if iip f(fu, u, p) return fu From ba26318289cda6f198b070029bd8f5a93c6a6fd1 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 5 Dec 2023 10:44:22 -0500 Subject: [PATCH 21/25] Run formatter --- src/pseudotransient.jl | 2 +- src/trustRegion.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index dfaf80180..2849e0a28 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -110,7 +110,7 @@ function perform_step!(cache::PseudoTransientCache{iip}) where {iip} inv_α = inv(cache.alpha) if cache.J isa SciMLOperators.AbstractSciMLOperator - A = cache.J - inv_α * I + A = cache.J - inv_α * I elseif setindex_trait(cache.J) === CanSetindex() if fast_scalar_indexing(cache.J) @inbounds for i in axes(cache.J, 1) diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 9087b0d53..738066bd2 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -567,4 +567,4 @@ function __rfunc(r::R, c2::R, M::R, γ1::R, γ2::R, β::R) where {R <: Real} return ifelse(r ≥ c2, (2 * (M - 1 - γ2) * atan(r - c2) + (1 + γ2)) / R(π), (1 - γ1 - β) * (exp(r - c2) + β / (1 - γ1 - β))) -end \ No newline at end of file +end From 5def9122d90231c9230ebbc722acbdb8465aae7a Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 5 Dec 2023 12:23:05 -0500 Subject: [PATCH 22/25] Fix all tests --- Project.toml | 4 ++-- src/broyden.jl | 2 +- src/jacobian.jl | 2 +- src/lbroyden.jl | 4 ++-- src/levenberg.jl | 22 ++++++++++------------ src/trustRegion.jl | 17 ++++++++++++----- test/23_test_problems.jl | 4 ++-- test/gpu.jl | 18 ++++++------------ test/infeasible.jl | 15 +++++---------- test/matrix_resizing.jl | 5 +++-- test/polyalgs.jl | 6 +++--- 11 files changed, 47 insertions(+), 52 deletions(-) diff --git a/Project.toml b/Project.toml index b4977a080..cec099ef5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NonlinearSolve" uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" authors = ["SciML"] -version = "2.9.0" +version = "2.10.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -72,7 +72,7 @@ Reexport = "0.2, 1" SafeTestsets = "0.1" SciMLBase = "2.9" SciMLOperators = "0.3" -SimpleNonlinearSolve = "1" +SimpleNonlinearSolve = "0.1.23" SparseArrays = "<0.0.1, 1" SparseDiffTools = "2.14" StaticArrays = "1" diff --git a/src/broyden.jl b/src/broyden.jl index 8b271d16c..c545ae0c2 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -116,7 +116,7 @@ function perform_step!(cache::GeneralBroydenCache{iip}) where {iip} @bb cache.u_cache = transpose(cache.J⁻¹) × vec(cache.du) denom = dot(cache.du, cache.J⁻¹dfu) @bb @. cache.du = (cache.du - cache.J⁻¹dfu) / ifelse(iszero(denom), T(1e-5), denom) - @bb cache.J⁻¹ += vec(cache.du) × transpose(cache.u_cache) + @bb cache.J⁻¹ += vec(cache.du) × transpose(_vec(cache.u_cache)) end @bb copyto!(cache.fu_cache, cache.fu) diff --git a/src/jacobian.jl b/src/jacobian.jl index 2174fbc8e..60be3f3cd 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -262,6 +262,6 @@ function __lr_mul(cache::AbstractNonlinearSolveCache, JᵀJ::KrylovJᵀJ, Jᵀf) return dot(_vec(Jᵀf), _vec(cache.lr_mul_cache)) end function __lr_mul(cache::AbstractNonlinearSolveCache, JᵀJ, Jᵀf) - @bb cache.lr_mul_cache = JᵀJ × Jᵀf + @bb cache.lr_mul_cache = JᵀJ × vec(Jᵀf) return dot(_vec(Jᵀf), _vec(cache.lr_mul_cache)) end diff --git a/src/lbroyden.jl b/src/lbroyden.jl index 34668e5c8..c4c73e11e 100644 --- a/src/lbroyden.jl +++ b/src/lbroyden.jl @@ -187,7 +187,7 @@ function _rmatvec!!(y, xᵀU, U, Vᵀ, x) x_ = vec(x) xᵀU_ = view(xᵀU, 1:η) @bb xᵀU_ = transpose(U) × x_ - @bb y = transpose(Vᵀ) × xᵀU_ + @bb y = transpose(Vᵀ) × vec(xᵀU_) @bb @. y -= x return y end @@ -202,7 +202,7 @@ function _matvec!!(y, Vᵀx, U, Vᵀ, x) x_ = vec(x) Vᵀx_ = view(Vᵀx, 1:η) @bb Vᵀx_ = Vᵀ × x_ - @bb y = U × Vᵀx_ + @bb y = U × vec(Vᵀx_) @bb @. y -= x return y end diff --git a/src/levenberg.jl b/src/levenberg.jl index 9463a7c34..0013be126 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -224,7 +224,7 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip}, @bb u_cache = copy(u) @bb u_cache_2 = similar(u) @bb fu_cache_2 = similar(fu) - Jv = J * v + Jv = J * _vec(v) @bb v_cache = zero(v) return LevenbergMarquardtCache{iip, fastls}(f, alg, u, u_cache, u_cache_2, fu, fu_cache, @@ -265,16 +265,15 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, end linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, b = cache.rhs_tmp, linu = _vec(cache.v), cache.p, reltol = cache.abstol) - cache.linsolve = linres.cache - @bb @. cache.v = -linres.u else @bb cache.u_cache_2 = transpose(cache.J) × cache.fu @bb @. cache.mat_tmp = cache.JᵀJ + cache.λ * cache.DᵀD linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp), b = _vec(cache.u_cache_2), linu = _vec(cache.v), cache.p, reltol = cache.abstol) - cache.linsolve = linres.cache - @bb @. cache.v = -linres.u end + cache.linsolve = linres.cache + linu = _restructure(cache.v, linres.u) + @bb @. cache.v = -linu update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), cache.J, cache.v) @@ -285,9 +284,9 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, # The following lines do: cache.a = -cache.mat_tmp \ cache.fu_tmp # NOTE: Don't pass `A`` in again, since we want to reuse the previous solve - @bb cache.Jv = cache.J × cache.v - @bb @. cache.fu_cache_2 = (2 / cache.h) * - ((cache.fu_cache_2 - cache.fu) / cache.h - cache.Jv) + @bb cache.Jv = cache.J × vec(cache.v) + Jv = _restructure(cache.fu_cache_2, cache.Jv) + @bb @. cache.fu_cache_2 = (2 / cache.h) * ((cache.fu_cache_2 - cache.fu) / cache.h - Jv) if fastls if setindex_trait(cache.rhs_tmp) === CanSetindex() cache.rhs_tmp[1:length(cache.fu)] .= _vec(cache.fu_cache_2) @@ -296,15 +295,14 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, end linres = dolinsolve(alg.precs, linsolve; b = cache.rhs_tmp, linu = _vec(cache.a), cache.p, reltol = cache.abstol) - cache.linsolve = linres.cache - @bb @. cache.a = -linres.u else @bb cache.u_cache_2 = transpose(cache.J) × cache.fu_cache_2 linres = dolinsolve(alg.precs, linsolve; b = _vec(cache.u_cache_2), linu = _vec(cache.a), cache.p, reltol = cache.abstol) - cache.linsolve = linres.cache - @bb @. cache.a = -linres.u end + cache.linsolve = linres.cache + linu = _restructure(cache.a, linres.u) + @bb @. cache.a = -linu cache.stats.nsolve += 2 cache.stats.nfactors += 2 diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 738066bd2..abc93fd9a 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -504,15 +504,16 @@ function dogleg!(cache::TrustRegionCache{iip}) where {iip} # outside of trust region l_grad = cache.internalnorm(cache.Jᵀf) # length of the gradient d_cauchy = l_grad^3 / __lr_mul(cache) + g = _restructure(cache.du, cache.Jᵀf) if d_cauchy ≥ cache.trust_r # step to the end of the trust region - @bb @. cache.du = -(cache.trust_r / l_grad) * cache.Jᵀf + @bb @. cache.du = -(cache.trust_r / l_grad) * g return end # Take the intersection of dogleg with trust region if Cauchy point lies inside the # trust region - @bb @. cache.u_cauchy = -(d_cauchy / l_grad) * cache.Jᵀf # compute Cauchy point + @bb @. cache.u_cauchy = -(d_cauchy / l_grad) * g # compute Cauchy point @bb @. cache.u_cache_2 = cache.u_gauss_newton - cache.u_cauchy # calf of the dogleg a = dot(cache.u_cache_2, cache.u_cache_2) @@ -547,13 +548,19 @@ function not_terminated(cache::TrustRegionCache) return true end -# FIXME: Update the JacVec Operator for Yuan +# FIXME: Reinit `JᵀJ` operator if `p` is changed function __reinit_internal!(cache::TrustRegionCache; kwargs...) + if cache.jvp_operator !== nothing + cache.jvp_operator = __jacvec(cache.uf, cache.u; cache.fu, + autodiff = __get_nonsparse_ad(cache.alg.ad)) + @bb cache.Jᵀf = cache.jvp_operator × cache.fu + end cache.loss = __trust_region_loss(cache, cache.fu) + cache.loss_new = cache.loss cache.shrink_counter = 0 cache.trust_r = convert(eltype(cache.u), - ifelse(cache.alg.initial_trust_radius == 0, cache.alg.initial_trust_radius, - cache.max_trust_r / 11)) + ifelse(cache.alg.initial_trust_radius == 0, cache.max_trust_r / 11, + cache.alg.initial_trust_radius)) cache.make_new_J = true return nothing end diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 7642f1ed6..58c08bb90 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -95,10 +95,10 @@ end alg_ops = (GeneralBroyden(; max_resets = 10),) broken_tests = Dict(alg => Int[] for alg in alg_ops) - broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 11, 12, 13, 14] + broken_tests[alg_ops[1]] = [1, 4, 5, 6, 11, 12, 13, 14] skip_tests = Dict(alg => Int[] for alg in alg_ops) - skip_tests[alg_ops[1]] = [22] + skip_tests[alg_ops[1]] = [2, 22] test_on_library(problems, dicts, alg_ops, broken_tests; skip_tests) end diff --git a/test/gpu.jl b/test/gpu.jl index daeee0c58..c314f4d76 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -6,28 +6,22 @@ A = cu(rand(4, 4)) u0 = cu(rand(4)) b = cu(rand(4)) -function f(du, u, p) - du .= A * u .+ b -end +linear_f(du, u, p) = (du .= A * u .+ b) -prob = NonlinearProblem(f, u0) +prob = NonlinearProblem(linear_f, u0) -# TrustRegion is broken -# LimitedMemoryBroyden will diverge! for alg in (NewtonRaphson(), LevenbergMarquardt(; linsolve = QRFactorization()), PseudoTransient(; alpha_initial = 1.0f0), GeneralKlement(), GeneralBroyden(), - LimitedMemoryBroyden()) + LimitedMemoryBroyden(), TrustRegion()) @test_nowarn sol = solve(prob, alg; abstol = 1.0f-8, reltol = 1.0f-8) end -f(u, p) = A * u .+ b +linear_f(u, p) = A * u .+ b -prob = NonlinearProblem{false}(f, u0) +prob = NonlinearProblem{false}(linear_f, u0) -# TrustRegion is broken -# LimitedMemoryBroyden will diverge! for alg in (NewtonRaphson(), LevenbergMarquardt(; linsolve = QRFactorization()), PseudoTransient(; alpha_initial = 1.0f0), GeneralKlement(), GeneralBroyden(), - LimitedMemoryBroyden()) + LimitedMemoryBroyden(), TrustRegion()) @test_nowarn sol = solve(prob, alg; abstol = 1.0f-8, reltol = 1.0f-8) end diff --git a/test/infeasible.jl b/test/infeasible.jl index db5d31f1b..74ec4128e 100644 --- a/test/infeasible.jl +++ b/test/infeasible.jl @@ -56,15 +56,10 @@ end @test all(!isnan, sol.u) @test !SciMLBase.successful_retcode(sol.retcode) - try - u0 = @SVector [0.0, 0.0, 0.0] - prob = NonlinearProblem(f1, u0) - sol = solve(prob) + u0 = @SVector [0.0, 0.0, 0.0] + prob = NonlinearProblem(f1, u0) + sol = solve(prob) - @test all(!isnan, sol.u) - @test !SciMLBase.successful_retcode(sol.retcode) - catch err - # Static Arrays has different default linearsolve which throws an error - @test err isa SingularException - end + @test all(!isnan, sol.u) + @test !SciMLBase.successful_retcode(sol.retcode) end diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl index 1d9462fa1..59a537ace 100644 --- a/test/matrix_resizing.jl +++ b/test/matrix_resizing.jl @@ -8,7 +8,7 @@ prob = NonlinearProblem(ff, u0, p) for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement(), - LimitedMemoryBroyden()) + LimitedMemoryBroyden(; threshold = 2)) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u end @@ -19,6 +19,7 @@ vecprob = NonlinearProblem(fiip, vec(u0), p) prob = NonlinearProblem(fiip, u0, p) for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), - RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement()) + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement(), + LimitedMemoryBroyden(; threshold = 2)) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u end diff --git a/test/polyalgs.jl b/test/polyalgs.jl index 0a4e599b3..e56bb5353 100644 --- a/test/polyalgs.jl +++ b/test/polyalgs.jl @@ -46,15 +46,15 @@ sol = solve(prob; abstol = 1e-9) # https://github.com/SciML/NonlinearSolve.jl/issues/187 # If we use a General Nonlinear Solver the solution might go out of the domain! -ff(u, p) = 0.5 / 1.5 * NaNMath.log.(u ./ (1.0 .- u)) .- 2.0 * u .+ 1.0 +ff_interval(u, p) = 0.5 / 1.5 * NaNMath.log.(u ./ (1.0 .- u)) .- 2.0 * u .+ 1.0 uspan = (0.02, 0.1) -prob = IntervalNonlinearProblem(ff, uspan) +prob = IntervalNonlinearProblem(ff_interval, uspan) sol = solve(prob; abstol = 1e-9) @test SciMLBase.successful_retcode(sol) u0 = 0.06 p = 2.0 -prob = NonlinearProblem(ff, u0, p) +prob = NonlinearProblem(ff_interval, u0, p) sol = solve(prob; abstol = 1e-9) @test SciMLBase.successful_retcode(sol) From ee042973bf6fd56fdb4a569a4c1aa4dd95665173 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 5 Dec 2023 13:49:30 -0500 Subject: [PATCH 23/25] Count statistics inside calls and not in individual algorithms --- src/gaussnewton.jl | 5 +---- src/klement.jl | 5 +---- src/levenberg.jl | 16 +++++++--------- src/pseudotransient.jl | 5 +---- src/raphson.jl | 5 +---- src/trustRegion.jl | 4 +--- src/utils.jl | 20 +++++++++++++++++--- 7 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl index 9a227a7fa..822b0ffc3 100644 --- a/src/gaussnewton.jl +++ b/src/gaussnewton.jl @@ -123,7 +123,7 @@ function perform_step!(cache::GaussNewtonCache{iip}) where {iip} A, b = cache.J, _vec(cache.fu) end - linres = dolinsolve(cache.alg.precs, cache.linsolve; A, b, linu = _vec(cache.du), + linres = dolinsolve(cache, cache.alg.precs, cache.linsolve; A, b, linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache cache.du = _restructure(cache.du, linres.u) @@ -142,9 +142,6 @@ function perform_step!(cache::GaussNewtonCache{iip}) where {iip} @bb copyto!(cache.u_cache, cache.u) @bb copyto!(cache.dfu, cache.fu) - cache.stats.njacs += 1 - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 return nothing end diff --git a/src/klement.jl b/src/klement.jl index 62aa8f681..da34958fe 100644 --- a/src/klement.jl +++ b/src/klement.jl @@ -123,7 +123,7 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} A = ifelse(cache.J isa SMatrix || cache.J isa Number || !fact_done, cache.J, nothing) # u = u - J \ fu - linres = dolinsolve(alg.precs, cache.linsolve; A, + linres = dolinsolve(cache, alg.precs, cache.linsolve; A, b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache cache.du = _restructure(cache.du, linres.u) @@ -139,9 +139,6 @@ function perform_step!(cache::GeneralKlementCache{iip}) where {iip} @bb copyto!(cache.u_cache, cache.u) - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 - cache.force_stop && return nothing # Update the Jacobian diff --git a/src/levenberg.jl b/src/levenberg.jl index 0013be126..160406f66 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -263,13 +263,14 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, else cache.rhs_tmp = _vcat(_vec(cache.fu), zero(_vec(cache.u))) end - linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, + linres = dolinsolve(cache, alg.precs, linsolve; A = cache.mat_tmp, b = cache.rhs_tmp, linu = _vec(cache.v), cache.p, reltol = cache.abstol) else @bb cache.u_cache_2 = transpose(cache.J) × cache.fu @bb @. cache.mat_tmp = cache.JᵀJ + cache.λ * cache.DᵀD - linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp), - b = _vec(cache.u_cache_2), linu = _vec(cache.v), cache.p, reltol = cache.abstol) + linres = dolinsolve(cache, alg.precs, linsolve; + A = __maybe_symmetric(cache.mat_tmp), b = _vec(cache.u_cache_2), + linu = _vec(cache.v), cache.p, reltol = cache.abstol) end cache.linsolve = linres.cache linu = _restructure(cache.v, linres.u) @@ -293,20 +294,17 @@ function perform_step!(cache::LevenbergMarquardtCache{iip, fastls}) where {iip, else cache.rhs_tmp = _vcat(_vec(cache.fu_cache_2), zero(_vec(cache.u))) end - linres = dolinsolve(alg.precs, linsolve; b = cache.rhs_tmp, linu = _vec(cache.a), - cache.p, reltol = cache.abstol) + linres = dolinsolve(cache, alg.precs, linsolve; b = cache.rhs_tmp, + linu = _vec(cache.a), cache.p, reltol = cache.abstol) else @bb cache.u_cache_2 = transpose(cache.J) × cache.fu_cache_2 - linres = dolinsolve(alg.precs, linsolve; b = _vec(cache.u_cache_2), + linres = dolinsolve(cache, alg.precs, linsolve; b = _vec(cache.u_cache_2), linu = _vec(cache.a), cache.p, reltol = cache.abstol) end cache.linsolve = linres.cache linu = _restructure(cache.a, linres.u) @bb @. cache.a = -linu - cache.stats.nsolve += 2 - cache.stats.nfactors += 2 - # Require acceptable steps to satisfy the following condition. norm_v = cache.internalnorm(cache.v) if 2 * cache.internalnorm(cache.a) ≤ cache.α_geodesic * norm_v diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index 2849e0a28..1416cc4b8 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -127,7 +127,7 @@ function perform_step!(cache::PseudoTransientCache{iip}) where {iip} end # u = u - J \ fu - linres = dolinsolve(alg.precs, cache.linsolve; A, b = _vec(cache.fu), + linres = dolinsolve(cache, alg.precs, cache.linsolve; A, b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache cache.du = _restructure(cache.du, linres.u) @@ -145,9 +145,6 @@ function perform_step!(cache::PseudoTransientCache{iip}) where {iip} check_and_update!(cache, cache.fu, cache.u, cache.u_cache) @bb copyto!(cache.u_cache, cache.u) - cache.stats.njacs += 1 - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 return nothing end diff --git a/src/raphson.jl b/src/raphson.jl index baf2ec10c..9ba6319aa 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -104,7 +104,7 @@ function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} cache.J = jacobian!!(cache.J, cache) # u = u - J \ fu - linres = dolinsolve(alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu), + linres = dolinsolve(cache, alg.precs, cache.linsolve; A = cache.J, b = _vec(cache.fu), linu = _vec(cache.du), cache.p, reltol = cache.abstol) cache.linsolve = linres.cache cache.du = _restructure(cache.du, linres.u) @@ -119,8 +119,5 @@ function perform_step!(cache::NewtonRaphsonCache{iip}) where {iip} check_and_update!(cache, cache.fu, cache.u, cache.u_cache) @bb copyto!(cache.u_cache, cache.u) - cache.stats.njacs += 1 - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 return nothing end diff --git a/src/trustRegion.jl b/src/trustRegion.jl index abc93fd9a..524aa8694 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -360,7 +360,7 @@ function perform_step!(cache::TrustRegionCache{iip}) where {iip} # do not use A = cache.H, b = _vec(cache.g) since it is equivalent # to A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular - linres = dolinsolve(cache.alg.precs, cache.linsolve, A = cache.J, + linres = dolinsolve(cache, cache.alg.precs, cache.linsolve, A = cache.J, b = _vec(cache.fu), linu = _vec(cache.u_gauss_newton), p = cache.p, reltol = cache.abstol) cache.linsolve = linres.cache @@ -375,8 +375,6 @@ function perform_step!(cache::TrustRegionCache{iip}) where {iip} @bb @. cache.u_cache_2 = cache.u + cache.du evaluate_f(cache, cache.u_cache_2, cache.p, Val{:fu_cache_2}()) trust_region_step!(cache) - cache.stats.nsolve += 1 - cache.stats.nfactors += 1 return nothing end diff --git a/src/utils.jl b/src/utils.jl index 56a976aa8..99eda8807 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -88,18 +88,26 @@ end DEFAULT_PRECS(W, du, u, p, t, newW, Plprev, Prprev, cachedata) = nothing, nothing -function dolinsolve(precs::P, linsolve::FakeLinearSolveJLCache; A = nothing, +function dolinsolve(cache, precs::P, linsolve::FakeLinearSolveJLCache; A = nothing, linu = nothing, b = nothing, du = nothing, p = nothing, weight = nothing, cachedata = nothing, reltol = nothing, reuse_A_if_factorization = false) where {P} + # Update Statistics + cache.stats.nsolve += 1 + cache.stats.nfactors += !(A isa Number) + A !== nothing && (linsolve.A = A) b !== nothing && (linsolve.b = b) linres = linsolve.A \ linsolve.b return FakeLinearSolveJLResult(linsolve, linres) end -function dolinsolve(precs::P, linsolve; A = nothing, linu = nothing, b = nothing, +function dolinsolve(cache, precs::P, linsolve; A = nothing, linu = nothing, b = nothing, du = nothing, p = nothing, weight = nothing, cachedata = nothing, reltol = nothing, reuse_A_if_factorization = false) where {P} + # Update Statistics + cache.stats.nsolve += 1 + cache.stats.nfactors += 1 + # Some Algorithms would reuse factorization but it causes the cache to not reset in # certain cases if A !== nothing @@ -108,10 +116,16 @@ function dolinsolve(precs::P, linsolve; A = nothing, linu = nothing, b = nothing (alg isa LinearSolve.DefaultLinearSolver && !(alg == LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES))) # Factorization Algorithm - !reuse_A_if_factorization && (linsolve.A = A) + if reuse_A_if_factorization + cache.stats.nfactors -= 1 + else + linsolve.A = A + end else linsolve.A = A end + else + cache.stats.nfactors -= 1 end b !== nothing && (linsolve.b = b) linu !== nothing && (linsolve.u = linu) From 8666e05a922699a20e085815ba926fbb1ebe223a Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 5 Dec 2023 17:59:51 -0500 Subject: [PATCH 24/25] Counter for jacobians --- src/jacobian.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/jacobian.jl b/src/jacobian.jl index 60be3f3cd..1ab1ff2b7 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -34,6 +34,7 @@ jacobian!!(J, _) = J # and we don't want wasteful `copyto!` function jacobian!!(J::Union{AbstractMatrix{<:Number}, Nothing}, cache) @unpack f, uf, u, p, jac_cache, alg, fu_cache = cache + cache.stats.njacs += 1 iip = isinplace(cache) if iip if has_jac(f) @@ -53,8 +54,10 @@ function jacobian!!(J::Union{AbstractMatrix{<:Number}, Nothing}, cache) end end # Scalar case -jacobian!!(::Number, cache) = last(value_derivative(cache.uf, cache.u)) - +function jacobian!!(::Number, cache) + cache.stats.njacs += 1 + return last(value_derivative(cache.uf, cache.u)) +end # Build Jacobian Caches function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f::F, u, p, ::Val{iip}; linsolve_kwargs = (;), lininit::Val{linsolve_init} = Val(true), From 3b52a5accf37028a1f32f1ce2664ddca7cb255bb Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Wed, 6 Dec 2023 11:07:26 -0500 Subject: [PATCH 25/25] More tests pass --- src/trustRegion.jl | 2 +- test/23_test_problems.jl | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 524aa8694..5a6360f73 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -236,7 +236,7 @@ end function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, - termination_condition = nothing, internalnorm = Base.Fix2(norm, 2), + termination_condition = nothing, internalnorm = DEFAULT_NORM, linsolve_kwargs = (;), kwargs...) where {uType, iip} alg = get_concrete_algorithm(alg_, prob) @unpack f, u0, p = prob diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 58c08bb90..035bb130c 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -75,7 +75,7 @@ end broken_tests = Dict(alg => Int[] for alg in alg_ops) broken_tests[alg_ops[1]] = [3, 6, 11, 17, 21] broken_tests[alg_ops[2]] = [3, 6, 11, 17, 21] - broken_tests[alg_ops[3]] = [6, 11, 17, 21] + broken_tests[alg_ops[3]] = [6, 11, 21] test_on_library(problems, dicts, alg_ops, broken_tests) end @@ -89,8 +89,6 @@ end test_on_library(problems, dicts, alg_ops, broken_tests) end -# Broyden and Klement Tests are quite flaky and failure seems to be platform dependent -# needs additional investigation before we can enable them @testset "GeneralBroyden 23 Test Problems" begin alg_ops = (GeneralBroyden(; max_resets = 10),)