From d20441fa87a6d24bc7c46619c05775b1b24fd935 Mon Sep 17 00:00:00 2001 From: "sonson.nguyen@mail.utoronto.ca" Date: Wed, 2 Apr 2025 18:43:27 -0400 Subject: [PATCH 1/4] feat: add hager zhang line search --- src/hager_zhang.jl | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/hager_zhang.jl diff --git a/src/hager_zhang.jl b/src/hager_zhang.jl new file mode 100644 index 0000000..e69de29 From a0e1704e85eb465bd7abab329a50795541e2f600 Mon Sep 17 00:00:00 2001 From: "sonson.nguyen@mail.utoronto.ca" Date: Wed, 2 Apr 2025 21:33:08 -0400 Subject: [PATCH 2/4] finished porting hager-zhang --- src/hager_zhang.jl | 269 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) diff --git a/src/hager_zhang.jl b/src/hager_zhang.jl index e69de29..a76126b 100644 --- a/src/hager_zhang.jl +++ b/src/hager_zhang.jl @@ -0,0 +1,269 @@ +@concrete struct HagerZhang <: AbstractLineSearchAlgorithm + autodiff + c1 + c2 + rho + epsilon + gamma + psi3 + maxstep + initial_alpha + maxiters::Int +end + +function HagerZhang(; autodiff=nothing, c1=0.1, c2=0.9, rho=5.0, + epsilon=1e-6, gamma=0.66, psi3=0.1, maxstep=Inf, + initial_alpha=true, maxiters=50) + return HagerZhang(autodiff, c1, c2, rho, epsilon, gamma, psi3, maxstep, initial_alpha, maxiters) +end + + +@concrete mutable struct HagerZhangCache <: AbstractLineSearchCache + # Problem and function data + f + p + ϕ # closure: computes objective φ(α) + ϕdϕ # closure: computes (φ(α), φ'(α)) + deriv_op + u_cache + fu_cache + # History of steps in current line search + alphas::Vector # step lengths tried + values::Vector # φ values at those steps + slopes::Vector # φ' (derivative) values at those steps + # Step control + alpha # current step size (floating T) + initial_alpha # initial step size (saved for reinit) + # Stats and back-reference + stats::Union{SciMLBase.NLStats, Nothing} + alg::HagerZhang + maxiters::Int +end + + +function CommonSolve.init(prob::AbstractNonlinearProblem, alg::HagerZhang, + fu, u; stats=nothing, autodiff=nothing, kwargs...) + # 1. Determine numeric type + T = promote_type(eltype(fu), eltype(u)) + # 2. Choose autodiff backend and construct derivative operator + autodiff = autodiff !== nothing ? autodiff : alg.autodiff + _, _, deriv_op = construct_jvp_or_vjp_operator(prob, fu, u; autodiff) + # 3. Allocate cache vectors for trial state + @bb u_cache = similar(u) + @bb fu_cache = similar(fu) + # 4. Define closures for φ(α) and φ+dφ(α) + ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin + @bb @. u_cache = u + α * du + fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p) + SciMLBase.add_nf!(stats) # increment function eval count + return norm(fu_cache)^2 / 2 # objective value + end + ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin + @bb @. u_cache = u + α * du + fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p) + SciMLBase.add_nf!(stats) + # Compute directional derivative via AD or analytic Jacobian: + deriv = deriv_op(du, u_cache, fu_cache, p) + obj = norm(fu_cache)^2 / 2 + return obj, deriv # (φ, φ') + end + # 5. Initial step size α (respect maxstep and initial_alpha) + u_norm = norm(u, Inf) + α0 = if u_norm == 0 + one(T) # if current u is zero-vector, use step = 1 + else + alg.initial_alpha isa Bool ? one(T) : T(alg.initial_alpha) + end + α0 = min(α0, T(alg.maxstep) / T(max(u_norm, one(T)))) + # 6. Initialize cache and return + return HagerZhangCache(prob.f, prob.p, ϕ, ϕdϕ, deriv_op, + u_cache, fu_cache, + Vector{T}(), Vector{T}(), Vector{T}(), + α0, α0, stats, alg, alg.maxiters) +end + +function CommonSolve.init(prob::AbstractNonlinearProblem, alg::HagerZhang, + fu, u; stats=nothing, autodiff=nothing, kwargs...) + # 1. Determine numeric type + T = promote_type(eltype(fu), eltype(u)) + # 2. Choose autodiff backend and construct derivative operator + autodiff = autodiff !== nothing ? autodiff : alg.autodiff + _, _, deriv_op = construct_jvp_or_vjp_operator(prob, fu, u; autodiff) + # 3. Allocate cache vectors for trial state + @bb u_cache = similar(u) + @bb fu_cache = similar(fu) + # 4. Define closures for φ(α) and φ+dφ(α) + ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin + @bb @. u_cache = u + α * du + fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p) + SciMLBase.add_nf!(stats) # increment function eval count + return norm(fu_cache)^2 / 2 # objective value + end + ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin + @bb @. u_cache = u + α * du + fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p) + SciMLBase.add_nf!(stats) + # Compute directional derivative via AD or analytic Jacobian: + deriv = deriv_op(du, u_cache, fu_cache, p) + obj = norm(fu_cache)^2 / 2 + return obj, deriv # (φ, φ') + end + # 5. Initial step size α (respect maxstep and initial_alpha) + u_norm = norm(u, Inf) + α0 = if u_norm == 0 + one(T) # if current u is zero-vector, use step = 1 + else + alg.initial_alpha isa Bool ? one(T) : T(alg.initial_alpha) + end + α0 = min(α0, T(alg.maxstep) / T(max(u_norm, one(T)))) + # 6. Initialize cache and return + return HagerZhangCache(prob.f, prob.p, ϕ, ϕdϕ, deriv_op, + u_cache, fu_cache, + Vector{T}(), Vector{T}(), Vector{T}(), + α0, α0, stats, alg, alg.maxiters) +end + + +function CommonSolve.solve!(cache::HagerZhangCache, u, du) + T = promote_type(eltype(u), eltype(du)) + # φ0 and dφ0 at alpha = 0 + φ0, dφ0 = cache.ϕdϕ(cache.f, cache.p, u, du, zero(T), + cache.u_cache, cache.fu_cache, cache.deriv_op) + if !(isfinite(φ0) && isfinite(dφ0)) + return LineSearchSolution(zero(T), ReturnCode.Failure) # non-finite baseline + end + if dφ0 >= 0 + return LineSearchSolution(zero(T), ReturnCode.ConvergenceFailure) # not descent + end + # Initialize history + empty!(cache.alphas); empty!(cache.values); empty!(cache.slopes) + push!(cache.alphas, zero(T)); push!(cache.values, φ0); push!(cache.slopes, dφ0) + # Initial trial step + local α = cache.alpha # initial guess from cache (already of type T) + α = min(α, T(cache.alg.maxstep)) # enforce maxstep + if α <= eps(T) + return LineSearchSolution(zero(T), ReturnCode.Success) # step too small (no move) + end + # Evaluate at initial α + φ_α, dφ_α = cache.ϕdϕ(cache.f, cache.p, u, du, α, + cache.u_cache, cache.fu_cache, cache.deriv_op) + # Ensure finite by shrinking via psi3 + iter_finite = 0 + while !(isfinite(φ_α) && isfinite(dφ_α)) && iter_finite < cache.alg.maxiters + α *= T(cache.alg.psi3) # reduce step + φ_α, dφ_α = cache.ϕdϕ(..., α, ...) # reevaluate + iter_finite += 1 + end + if !(isfinite(φ_α) && isfinite(dφ_α)) + # Could not find a finite φ even after reducing step + return LineSearchSolution(zero(T), ReturnCode.Failure) + end + push!(cache.alphas, α); push!(cache.values, φ_α); push!(cache.slopes, dφ_α) + # Now have two points: index1 (0) and index2 (α) + phi_lim = φ0 + T(cache.alg.epsilon) * abs(φ0) + ia = 1; ib = 2 + # Bracketing loop + local cold = zero(T); local φ_cold = φ0 + local is_bracketed = false + local iter = 1 + while !is_bracketed && iter < cache.maxiters + # Current end point is index ib (the last pushed point) + α = cache.alphas[ib]; φ_α = cache.values[ib]; dφ_α = cache.slopes[ib] + if dφ_α >= zero(T) + # Slope non-negative: bracket found between ia and ib + ib = length(cache.alphas) + # choose ia as last index <= phi_lim going backwards + ia = 1 + for i in (ib-1):-1:1 + if cache.values[i] <= phi_lim + ia = i + break + end + end + is_bracketed = true + elseif φ_α > phi_lim + # Function value increased beyond phi_lim, slope still negative: crest scenario + ib = length(cache.alphas) + ia = 1 + # Bisect between ia and ib to find a bracket + (ia, ib) = bisect!(cache, ia, ib, phi_lim) # This will evaluate mid-points and update history + is_bracketed = true + else + # Still going downhill and φ not increased significantly: expand further + cold = α; φ_cold = φ_α # save last good point + if nextfloat(cold) >= T(cache.alg.maxstep) + # Reached maximum step effectively + return LineSearchSolution(cold, ReturnCode.Success) + end + # Propose new α = α * rho + α = min(cold * T(cache.alg.rho), T(cache.alg.maxstep)) + φ_α, dφ_α = cache.ϕdϕ(..., α, ...) + # Check finite again, possibly bisect if not finite + if !(isfinite(φ_α) && isfinite(dφ_α)) + cache.alg.maxstep = α # shrink maxstep to current α + # bisect between cold and α until finite or limit + local α_hi = α; local α_lo = cold + local it_f = 1 + while !(isfinite(φ_α) && isfinite(dφ_α)) && it_f < cache.alg.maxiters && α_hi > nextfloat(α_lo) + α = (α_lo + α_hi)/2 + φ_α, dφ_α = cache.ϕdϕ(..., α, ...) + if isfinite(φ_α) && isfinite(dφ_α) + break + end + α_hi = α # shrink upper bound + it_f += 1 + end + if !(isfinite(φ_α) && isfinite(dφ_α)) + # Failed to find finite in bracket + return LineSearchSolution(cold, ReturnCode.Failure) + end + end + # Append this new point and continue loop + push!(cache.alphas, α); push!(cache.values, φ_α); push!(cache.slopes, dφ_α) + ib = length(cache.alphas) + end + iter += 1 + end # end bracketing loop + + if !is_bracketed + # Bracketing failed within maxiters + return LineSearchSolution(cache.alphas[end], ReturnCode.Failure) + end + + # Now have bracket [ia, ib] with ia < ib + while iter < cache.maxiters + # Current bracket interval + local a = cache.alphas[ia]; local b = cache.alphas[ib] + if b - a <= eps(b) + return LineSearchSolution(a, ReturnCode.Success) # interval too small + end + # Interpolation step to find trial between a and b + local (is_wolfe, i_new, j_new) = secant2!(cache, ia, ib, phi_lim) + # secant2! will: + # - evaluate φ and φ' at new trial alpha within (a,b) + # - add new trial to cache.alphas/values/slopes + # - return true/false if Wolfe met, and updated bracket indices. + if is_wolfe + # Wolfe conditions satisfied at new trial + local idx = i_new # index of the accepted point + return LineSearchSolution(cache.alphas[idx], ReturnCode.Success) + else + # Not yet satisfied: update bracket and continue + ia = i_new; ib = j_new + end + iter += 1 + end + + # If we exit loop by exceeding iterations: + return LineSearchSolution(cache.alphas[ia], ReturnCode.Failure) +end + +function SciMLBase.reinit!(cache::HagerZhangCache; p=cache.p, stats=cache.stats) + cache.p = p + cache.stats = stats + cache.alpha = cache.initial_alpha + empty!(cache.alphas) + empty!(cache.values) + empty!(cache.slopes) + return cache +end From a7818b85e586e0cf052272626faa117f585dc438 Mon Sep 17 00:00:00 2001 From: "sonson.nguyen@mail.utoronto.ca" Date: Wed, 2 Apr 2025 21:34:01 -0400 Subject: [PATCH 3/4] added new LineSearch.jl --- src/LineSearch.jl | 91 +++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/src/LineSearch.jl b/src/LineSearch.jl index 57d3f0e..a3f7307 100644 --- a/src/LineSearch.jl +++ b/src/LineSearch.jl @@ -1,43 +1,48 @@ -module LineSearch - -using ADTypes: ADTypes -using CommonSolve: CommonSolve -using ConcreteStructs: @concrete -using FastClosures: @closure -using LinearAlgebra: norm, dot -using MaybeInplace: @bb -using SciMLBase: SciMLBase, AbstractSciMLProblem, AbstractNonlinearProblem, ReturnCode, - NonlinearProblem, NonlinearLeastSquaresProblem, NonlinearFunction -using SciMLJacobianOperators: VecJacOperator, JacVecOperator -using StaticArraysCore: SArray - -abstract type AbstractLineSearchAlgorithm end -abstract type AbstractLineSearchCache end - -# Needed for certain algorithms like RobustNonMonotoneLineSearch -function callback_into_cache!(::AbstractLineSearchCache, _) end - -# By default, reinit! does nothing -function SciMLBase.reinit!(::AbstractLineSearchCache; kwargs...) end - -include("utils.jl") - -include("backtracking.jl") -include("li_fukushima.jl") -include("no_search.jl") -include("robust_non_monotone.jl") - -include("line_searches_ext.jl") - -@concrete struct LineSearchSolution - step_size - retcode::ReturnCode.T -end - -export LineSearchSolution - -export BackTracking -export NoLineSearch, LiFukushimaLineSearch, RobustNonMonotoneLineSearch -export LineSearchesJL - -end +module LineSearch + +using ADTypes: ADTypes +using CommonSolve: CommonSolve +using ConcreteStructs: @concrete +using FastClosures: @closure +using LinearAlgebra: norm, dot +using MaybeInplace: @bb +using SciMLBase: SciMLBase, AbstractSciMLProblem, AbstractNonlinearProblem, ReturnCode, + NonlinearProblem, NonlinearLeastSquaresProblem, NonlinearFunction +using SciMLJacobianOperators: VecJacOperator, JacVecOperator +using StaticArraysCore: SArray + +# The 2 Core abstractions of this module +# Base type for all line search algorithm +abstract type AbstractLineSearchAlgorithm end +# Base type for all algorithm-specific caching +abstract type AbstractLineSearchCache end + +# Needed for certain algorithms like RobustNonMonotoneLineSearch +function callback_into_cache!(::AbstractLineSearchCache, _) end + +# By default, reinit! does nothing +function SciMLBase.reinit!(::AbstractLineSearchCache; kwargs...) end + +include("utils.jl") + +include("backtracking.jl") +include("li_fukushima.jl") +include("no_search.jl") +include("robust_non_monotone.jl") + +include("line_searches_ext.jl") +include("hager_zhang.jl") + +@concrete struct LineSearchSolution + step_size + retcode::ReturnCode.T +end + +export LineSearchSolution + +export BackTracking +export HagerZhang +export NoLineSearch, LiFukushimaLineSearch, RobustNonMonotoneLineSearch +export LineSearchesJL + +end From b887b44bb69bb7525626a764e83b516d2ec43a57 Mon Sep 17 00:00:00 2001 From: "sonson.nguyen@mail.utoronto.ca" Date: Sun, 6 Apr 2025 21:49:33 -0400 Subject: [PATCH 4/4] added test for hager_zhang and editted hager_zhang --- src/hager_zhang.jl | 48 +----- test/custom_optimizer_tests.jl | 304 +++++++++++++++++---------------- 2 files changed, 156 insertions(+), 196 deletions(-) diff --git a/src/hager_zhang.jl b/src/hager_zhang.jl index a76126b..95485d1 100644 --- a/src/hager_zhang.jl +++ b/src/hager_zhang.jl @@ -82,48 +82,6 @@ function CommonSolve.init(prob::AbstractNonlinearProblem, alg::HagerZhang, α0, α0, stats, alg, alg.maxiters) end -function CommonSolve.init(prob::AbstractNonlinearProblem, alg::HagerZhang, - fu, u; stats=nothing, autodiff=nothing, kwargs...) - # 1. Determine numeric type - T = promote_type(eltype(fu), eltype(u)) - # 2. Choose autodiff backend and construct derivative operator - autodiff = autodiff !== nothing ? autodiff : alg.autodiff - _, _, deriv_op = construct_jvp_or_vjp_operator(prob, fu, u; autodiff) - # 3. Allocate cache vectors for trial state - @bb u_cache = similar(u) - @bb fu_cache = similar(fu) - # 4. Define closures for φ(α) and φ+dφ(α) - ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin - @bb @. u_cache = u + α * du - fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p) - SciMLBase.add_nf!(stats) # increment function eval count - return norm(fu_cache)^2 / 2 # objective value - end - ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin - @bb @. u_cache = u + α * du - fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p) - SciMLBase.add_nf!(stats) - # Compute directional derivative via AD or analytic Jacobian: - deriv = deriv_op(du, u_cache, fu_cache, p) - obj = norm(fu_cache)^2 / 2 - return obj, deriv # (φ, φ') - end - # 5. Initial step size α (respect maxstep and initial_alpha) - u_norm = norm(u, Inf) - α0 = if u_norm == 0 - one(T) # if current u is zero-vector, use step = 1 - else - alg.initial_alpha isa Bool ? one(T) : T(alg.initial_alpha) - end - α0 = min(α0, T(alg.maxstep) / T(max(u_norm, one(T)))) - # 6. Initialize cache and return - return HagerZhangCache(prob.f, prob.p, ϕ, ϕdϕ, deriv_op, - u_cache, fu_cache, - Vector{T}(), Vector{T}(), Vector{T}(), - α0, α0, stats, alg, alg.maxiters) -end - - function CommonSolve.solve!(cache::HagerZhangCache, u, du) T = promote_type(eltype(u), eltype(du)) # φ0 and dφ0 at alpha = 0 @@ -151,7 +109,7 @@ function CommonSolve.solve!(cache::HagerZhangCache, u, du) iter_finite = 0 while !(isfinite(φ_α) && isfinite(dφ_α)) && iter_finite < cache.alg.maxiters α *= T(cache.alg.psi3) # reduce step - φ_α, dφ_α = cache.ϕdϕ(..., α, ...) # reevaluate + φ_α, dφ_α = cache.ϕdϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op) # reevaluate iter_finite += 1 end if !(isfinite(φ_α) && isfinite(dφ_α)) @@ -197,7 +155,7 @@ function CommonSolve.solve!(cache::HagerZhangCache, u, du) end # Propose new α = α * rho α = min(cold * T(cache.alg.rho), T(cache.alg.maxstep)) - φ_α, dφ_α = cache.ϕdϕ(..., α, ...) + φ_α, dφ_α = cache.ϕdϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op) # Check finite again, possibly bisect if not finite if !(isfinite(φ_α) && isfinite(dφ_α)) cache.alg.maxstep = α # shrink maxstep to current α @@ -206,7 +164,7 @@ function CommonSolve.solve!(cache::HagerZhangCache, u, du) local it_f = 1 while !(isfinite(φ_α) && isfinite(dφ_α)) && it_f < cache.alg.maxiters && α_hi > nextfloat(α_lo) α = (α_lo + α_hi)/2 - φ_α, dφ_α = cache.ϕdϕ(..., α, ...) + φ_α, dφ_α = cache.ϕdϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op) if isfinite(φ_α) && isfinite(dφ_α) break end diff --git a/test/custom_optimizer_tests.jl b/test/custom_optimizer_tests.jl index 4a11f2e..81d94cd 100644 --- a/test/custom_optimizer_tests.jl +++ b/test/custom_optimizer_tests.jl @@ -1,151 +1,153 @@ -# Test based on https://julianlsolvers.github.io/LineSearches.jl/stable/examples/generated/customoptimizer.html -@testsetup module CustomOptimizer -using LinearAlgebra, SciMLBase, LineSearch, SciMLJacobianOperators - -function gradient_descent( - prob, alg; g_atol::Real = 1e-5, maxiters::Int = 10000, autodiff = nothing) - u = copy(prob.u0) - if SciMLBase.isinplace(prob) - fu = similar(u) - prob.f(fu, u, prob.p) - else - fu = prob.f(u, prob.p) - end - - ls_cache = init(prob, alg, fu, u) - vjp_op = VecJacOperator(prob, fu, u; autodiff) - - alphas = Float64[] - iter = 0 - for _ in 1:maxiters - iter += 1 - svjp_op = StatefulJacobianOperator(vjp_op, u, prob.p) - gs = svjp_op * fu .* 2 - δu = -gs - - ls_sol = solve!(ls_cache, u, δu) - - push!(alphas, ls_sol.step_size) - @. u = u + ls_sol.step_size * δu - gnorm = norm(gs) - - if SciMLBase.isinplace(prob) - fu = similar(u) - prob.f(fu, u, prob.p) - else - fu = prob.f(u, prob.p) - end - - LineSearch.callback_into_cache!(ls_cache, fu) - - gnorm < g_atol && break - end - - return fu, u, iter, alphas -end - -export gradient_descent - -end - -@testitem "LineSearches.jl: Custom Optimizer" setup=[CustomOptimizer] begin - using LineSearches, SciMLBase - using ADTypes, Tracker, ForwardDiff, Zygote, Enzyme, ReverseDiff, FiniteDiff - - @testset "OOP Problem" begin - nlf(x, p) = [p[1] - x[1], 10.0 * (x[2] - x[1]^2)] - nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0]) - - @testset for autodiff in ( - AutoTracker(), AutoForwardDiff(), AutoZygote(), - AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff() - ) - @testset "method: $(nameof(typeof(method)))" for method in ( - LineSearches.BackTracking(; order = 3), - StrongWolfe(), - HagerZhang(), - MoreThuente() - ) - linesearch = LineSearchesJL(; method, autodiff) - fu, u, iter, alphas = gradient_descent(nlp, linesearch; autodiff) - - @test fu≈[0.0, 0.0] atol=1e-2 - @test u≈[1.0, 1.0] atol=1e-2 - @test !all(isone, alphas) - end - end - end - - @testset "In-Place Problem" begin - nlf(dx, x, p) = (dx .= [p[1] - x[1], 10.0 * (x[2] - x[1]^2)]) - nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0]) - - @testset for autodiff in ( - AutoForwardDiff(), AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff() - ) - @testset "method: $(nameof(typeof(method)))" for method in ( - LineSearches.BackTracking(; order = 3), - StrongWolfe(), - HagerZhang(), - MoreThuente() - ) - linesearch = LineSearchesJL(; method, autodiff) - fu, u, iter, alphas = gradient_descent(nlp, linesearch; autodiff) - - @test fu≈[0.0, 0.0] atol=1e-2 - @test u≈[1.0, 1.0] atol=1e-2 - @test !all(isone, alphas) - end - end - end -end - -@testitem "Native Line Search: Custom Optimizer" setup=[CustomOptimizer] begin - using SciMLBase - using ADTypes, Tracker, ForwardDiff, Zygote, Enzyme, ReverseDiff, FiniteDiff - - @testset "OOP Problem" begin - nlf(x, p) = [p[1] - x[1], 10.0 * (x[2] - x[1]^2)] - nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0]) - - @testset for autodiff in ( - AutoTracker(), AutoForwardDiff(), AutoZygote(), - AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff() - ) - @testset "method: $(nameof(typeof(method)))" for method in ( - LiFukushimaLineSearch(), - NoLineSearch(0.001), - BackTracking(; order = Val(3), autodiff), - BackTracking(; order = Val(2), autodiff) - ) - fu, u, iter, alphas = gradient_descent(nlp, method; autodiff) - - @test fu≈[0.0, 0.0] atol=1e-1 - @test u≈[1.0, 1.0] atol=1e-1 - @test !all(isone, alphas) - end - end - end - - @testset "In-Place Problem" begin - nlf(dx, x, p) = (dx .= [p[1] - x[1], 10.0 * (x[2] - x[1]^2)]) - nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0]) - - @testset for autodiff in ( - AutoForwardDiff(), AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff() - ) - @testset "method: $(nameof(typeof(method)))" for method in ( - LiFukushimaLineSearch(), - NoLineSearch(0.001), - BackTracking(; order = Val(3), autodiff), - BackTracking(; order = Val(2), autodiff) - ) - fu, u, iter, alphas = gradient_descent(nlp, method; autodiff) - - @test fu≈[0.0, 0.0] atol=1e-1 - @test u≈[1.0, 1.0] atol=1e-1 - @test !all(isone, alphas) - end - end - end -end +# Test based on https://julianlsolvers.github.io/LineSearches.jl/stable/examples/generated/customoptimizer.html +@testsetup module CustomOptimizer +using LinearAlgebra, SciMLBase, LineSearch, SciMLJacobianOperators + +function gradient_descent( + prob, alg; g_atol::Real = 1e-5, maxiters::Int = 10000, autodiff = nothing) + u = copy(prob.u0) + if SciMLBase.isinplace(prob) + fu = similar(u) + prob.f(fu, u, prob.p) + else + fu = prob.f(u, prob.p) + end + + ls_cache = init(prob, alg, fu, u) + vjp_op = VecJacOperator(prob, fu, u; autodiff) + + alphas = Float64[] + iter = 0 + for _ in 1:maxiters + iter += 1 + svjp_op = StatefulJacobianOperator(vjp_op, u, prob.p) + gs = svjp_op * fu .* 2 + δu = -gs + + ls_sol = solve!(ls_cache, u, δu) + + push!(alphas, ls_sol.step_size) + @. u = u + ls_sol.step_size * δu + gnorm = norm(gs) + + if SciMLBase.isinplace(prob) + fu = similar(u) + prob.f(fu, u, prob.p) + else + fu = prob.f(u, prob.p) + end + + LineSearch.callback_into_cache!(ls_cache, fu) + + gnorm < g_atol && break + end + + return fu, u, iter, alphas +end + +export gradient_descent + +end + +@testitem "LineSearches.jl: Custom Optimizer" setup=[CustomOptimizer] begin + using LineSearches, SciMLBase + using ADTypes, Tracker, ForwardDiff, Zygote, Enzyme, ReverseDiff, FiniteDiff + + @testset "OOP Problem" begin + nlf(x, p) = [p[1] - x[1], 10.0 * (x[2] - x[1]^2)] + nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0]) + + @testset for autodiff in ( + AutoTracker(), AutoForwardDiff(), AutoZygote(), + AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff() + ) + @testset "method: $(nameof(typeof(method)))" for method in ( + LineSearches.BackTracking(; order = 3), + StrongWolfe(), + HagerZhang(), + MoreThuente() + ) + linesearch = LineSearchesJL(; method, autodiff) + fu, u, iter, alphas = gradient_descent(nlp, linesearch; autodiff) + + @test fu≈[0.0, 0.0] atol=1e-2 + @test u≈[1.0, 1.0] atol=1e-2 + @test !all(isone, alphas) + end + end + end + + @testset "In-Place Problem" begin + nlf(dx, x, p) = (dx .= [p[1] - x[1], 10.0 * (x[2] - x[1]^2)]) + nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0]) + + @testset for autodiff in ( + AutoForwardDiff(), AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff() + ) + @testset "method: $(nameof(typeof(method)))" for method in ( + LineSearches.BackTracking(; order = 3), + StrongWolfe(), + HagerZhang(), + MoreThuente() + ) + linesearch = LineSearchesJL(; method, autodiff) + fu, u, iter, alphas = gradient_descent(nlp, linesearch; autodiff) + + @test fu≈[0.0, 0.0] atol=1e-2 + @test u≈[1.0, 1.0] atol=1e-2 + @test !all(isone, alphas) + end + end + end +end + +@testitem "Native Line Search: Custom Optimizer" setup=[CustomOptimizer] begin + using SciMLBase + using ADTypes, Tracker, ForwardDiff, Zygote, Enzyme, ReverseDiff, FiniteDiff + + @testset "OOP Problem" begin + nlf(x, p) = [p[1] - x[1], 10.0 * (x[2] - x[1]^2)] + nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0]) + + @testset for autodiff in ( + AutoTracker(), AutoForwardDiff(), AutoZygote(), + AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff() + ) + @testset "method: $(nameof(typeof(method)))" for method in ( + LiFukushimaLineSearch(), + NoLineSearch(0.001), + BackTracking(; order = Val(3), autodiff), + BackTracking(; order = Val(2), autodiff), + HagerZhang(; autodiff = autodiff) + ) + fu, u, iter, alphas = gradient_descent(nlp, method; autodiff) + + @test fu≈[0.0, 0.0] atol=1e-1 + @test u≈[1.0, 1.0] atol=1e-1 + @test !all(isone, alphas) + end + end + end + + @testset "In-Place Problem" begin + nlf(dx, x, p) = (dx .= [p[1] - x[1], 10.0 * (x[2] - x[1]^2)]) + nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0]) + + @testset for autodiff in ( + AutoForwardDiff(), AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff() + ) + @testset "method: $(nameof(typeof(method)))" for method in ( + LiFukushimaLineSearch(), + NoLineSearch(0.001), + BackTracking(; order = Val(3), autodiff), + BackTracking(; order = Val(2), autodiff), + HagerZhang(; autodiff) + ) + fu, u, iter, alphas = gradient_descent(nlp, method; autodiff) + + @test fu≈[0.0, 0.0] atol=1e-1 + @test u≈[1.0, 1.0] atol=1e-1 + @test !all(isone, alphas) + end + end + end +end