From d20441fa87a6d24bc7c46619c05775b1b24fd935 Mon Sep 17 00:00:00 2001
From: "sonson.nguyen@mail.utoronto.ca" <sontheteacher0605@gmail.com>
Date: Wed, 2 Apr 2025 18:43:27 -0400
Subject: [PATCH 1/4] feat: add hager zhang line search

---
 src/hager_zhang.jl | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/hager_zhang.jl

diff --git a/src/hager_zhang.jl b/src/hager_zhang.jl
new file mode 100644
index 0000000..e69de29

From a0e1704e85eb465bd7abab329a50795541e2f600 Mon Sep 17 00:00:00 2001
From: "sonson.nguyen@mail.utoronto.ca" <sontheteacher0605@gmail.com>
Date: Wed, 2 Apr 2025 21:33:08 -0400
Subject: [PATCH 2/4] finished porting hager-zhang

---
 src/hager_zhang.jl | 269 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 269 insertions(+)

diff --git a/src/hager_zhang.jl b/src/hager_zhang.jl
index e69de29..a76126b 100644
--- a/src/hager_zhang.jl
+++ b/src/hager_zhang.jl
@@ -0,0 +1,269 @@
+@concrete struct HagerZhang <: AbstractLineSearchAlgorithm
+    autodiff
+    c1
+    c2
+    rho
+    epsilon
+    gamma
+    psi3
+    maxstep
+    initial_alpha
+    maxiters::Int
+end
+
+function HagerZhang(; autodiff=nothing, c1=0.1, c2=0.9, rho=5.0,
+                    epsilon=1e-6, gamma=0.66, psi3=0.1, maxstep=Inf,
+                    initial_alpha=true, maxiters=50)
+    return HagerZhang(autodiff, c1, c2, rho, epsilon, gamma, psi3, maxstep, initial_alpha, maxiters)
+end
+
+
+@concrete mutable struct HagerZhangCache <: AbstractLineSearchCache
+    # Problem and function data
+    f
+    p
+    ϕ        # closure: computes objective φ(α)
+    ϕdϕ      # closure: computes (φ(α), φ'(α))
+    deriv_op
+    u_cache
+    fu_cache
+    # History of steps in current line search
+    alphas::Vector      # step lengths tried
+    values::Vector      # φ values at those steps
+    slopes::Vector      # φ' (derivative) values at those steps
+    # Step control
+    alpha               # current step size (floating T)
+    initial_alpha       # initial step size (saved for reinit)
+    # Stats and back-reference
+    stats::Union{SciMLBase.NLStats, Nothing}
+    alg::HagerZhang
+    maxiters::Int
+end
+
+
+function CommonSolve.init(prob::AbstractNonlinearProblem, alg::HagerZhang,
+                          fu, u; stats=nothing, autodiff=nothing, kwargs...)
+    # 1. Determine numeric type
+    T = promote_type(eltype(fu), eltype(u))
+    # 2. Choose autodiff backend and construct derivative operator
+    autodiff = autodiff !== nothing ? autodiff : alg.autodiff
+    _, _, deriv_op = construct_jvp_or_vjp_operator(prob, fu, u; autodiff)
+    # 3. Allocate cache vectors for trial state
+    @bb u_cache = similar(u)
+    @bb fu_cache = similar(fu)
+    # 4. Define closures for φ(α) and φ+dφ(α)
+    ϕ   = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
+              @bb @. u_cache = u + α * du
+              fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p)
+              SciMLBase.add_nf!(stats)             # increment function eval count
+              return norm(fu_cache)^2 / 2          # objective value
+          end
+    ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin
+              @bb @. u_cache = u + α * du
+              fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p)
+              SciMLBase.add_nf!(stats)
+              # Compute directional derivative via AD or analytic Jacobian:
+              deriv = deriv_op(du, u_cache, fu_cache, p)
+              obj = norm(fu_cache)^2 / 2
+              return obj, deriv                   # (φ, φ')
+          end
+    # 5. Initial step size α (respect maxstep and initial_alpha)
+    u_norm = norm(u, Inf)
+    α0 = if u_norm == 0 
+            one(T)              # if current u is zero-vector, use step = 1
+         else 
+            alg.initial_alpha isa Bool ? one(T) : T(alg.initial_alpha)
+         end
+    α0 = min(α0, T(alg.maxstep) / T(max(u_norm, one(T))))
+    # 6. Initialize cache and return
+    return HagerZhangCache(prob.f, prob.p, ϕ, ϕdϕ, deriv_op,
+                           u_cache, fu_cache,
+                           Vector{T}(), Vector{T}(), Vector{T}(),
+                           α0, α0, stats, alg, alg.maxiters)
+end
+
+function CommonSolve.init(prob::AbstractNonlinearProblem, alg::HagerZhang,
+                        fu, u; stats=nothing, autodiff=nothing, kwargs...)
+    # 1. Determine numeric type
+    T = promote_type(eltype(fu), eltype(u))
+    # 2. Choose autodiff backend and construct derivative operator
+    autodiff = autodiff !== nothing ? autodiff : alg.autodiff
+    _, _, deriv_op = construct_jvp_or_vjp_operator(prob, fu, u; autodiff)
+    # 3. Allocate cache vectors for trial state
+    @bb u_cache = similar(u)
+    @bb fu_cache = similar(fu)
+    # 4. Define closures for φ(α) and φ+dφ(α)
+    ϕ   = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
+            @bb @. u_cache = u + α * du
+            fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p)
+            SciMLBase.add_nf!(stats)             # increment function eval count
+            return norm(fu_cache)^2 / 2          # objective value
+        end
+    ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin
+            @bb @. u_cache = u + α * du
+            fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p)
+            SciMLBase.add_nf!(stats)
+            # Compute directional derivative via AD or analytic Jacobian:
+            deriv = deriv_op(du, u_cache, fu_cache, p)
+            obj = norm(fu_cache)^2 / 2
+            return obj, deriv                   # (φ, φ')
+        end
+    # 5. Initial step size α (respect maxstep and initial_alpha)
+    u_norm = norm(u, Inf)
+    α0 = if u_norm == 0 
+            one(T)              # if current u is zero-vector, use step = 1
+        else 
+            alg.initial_alpha isa Bool ? one(T) : T(alg.initial_alpha)
+        end
+    α0 = min(α0, T(alg.maxstep) / T(max(u_norm, one(T))))
+    # 6. Initialize cache and return
+    return HagerZhangCache(prob.f, prob.p, ϕ, ϕdϕ, deriv_op,
+        u_cache, fu_cache,
+        Vector{T}(), Vector{T}(), Vector{T}(),
+        α0, α0, stats, alg, alg.maxiters)
+end
+
+
+function CommonSolve.solve!(cache::HagerZhangCache, u, du)
+    T = promote_type(eltype(u), eltype(du))
+    # φ0 and dφ0 at alpha = 0
+    φ0, dφ0 = cache.ϕdϕ(cache.f, cache.p, u, du, zero(T),
+                         cache.u_cache, cache.fu_cache, cache.deriv_op)
+    if !(isfinite(φ0) && isfinite(dφ0))
+        return LineSearchSolution(zero(T), ReturnCode.Failure)   # non-finite baseline
+    end
+    if dφ0 >= 0
+        return LineSearchSolution(zero(T), ReturnCode.ConvergenceFailure)  # not descent
+    end
+    # Initialize history
+    empty!(cache.alphas); empty!(cache.values); empty!(cache.slopes)
+    push!(cache.alphas, zero(T)); push!(cache.values, φ0); push!(cache.slopes, dφ0)
+    # Initial trial step
+    local α = cache.alpha            # initial guess from cache (already of type T)
+    α = min(α, T(cache.alg.maxstep)) # enforce maxstep
+    if α <= eps(T)
+        return LineSearchSolution(zero(T), ReturnCode.Success)   # step too small (no move)
+    end
+    # Evaluate at initial α
+    φ_α, dφ_α = cache.ϕdϕ(cache.f, cache.p, u, du, α,
+                           cache.u_cache, cache.fu_cache, cache.deriv_op)
+    # Ensure finite by shrinking via psi3
+    iter_finite = 0
+    while !(isfinite(φ_α) && isfinite(dφ_α)) && iter_finite < cache.alg.maxiters
+        α *= T(cache.alg.psi3)   # reduce step
+        φ_α, dφ_α = cache.ϕdϕ(..., α, ...)    # reevaluate
+        iter_finite += 1
+    end
+    if !(isfinite(φ_α) && isfinite(dφ_α))
+        # Could not find a finite φ even after reducing step
+        return LineSearchSolution(zero(T), ReturnCode.Failure)
+    end
+    push!(cache.alphas, α); push!(cache.values, φ_α); push!(cache.slopes, dφ_α)
+    # Now have two points: index1 (0) and index2 (α)
+    phi_lim = φ0 + T(cache.alg.epsilon) * abs(φ0)
+    ia = 1; ib = 2
+    # Bracketing loop
+    local cold = zero(T);  local φ_cold = φ0
+    local is_bracketed = false
+    local iter = 1
+    while !is_bracketed && iter < cache.maxiters
+        # Current end point is index ib (the last pushed point)
+        α = cache.alphas[ib];  φ_α = cache.values[ib];  dφ_α = cache.slopes[ib]
+        if dφ_α >= zero(T)
+            # Slope non-negative: bracket found between ia and ib
+            ib = length(cache.alphas)
+            # choose ia as last index <= phi_lim going backwards
+            ia = 1
+            for i in (ib-1):-1:1
+                if cache.values[i] <= phi_lim
+                    ia = i
+                    break
+                end
+            end
+            is_bracketed = true
+        elseif φ_α > phi_lim
+            # Function value increased beyond phi_lim, slope still negative: crest scenario
+            ib = length(cache.alphas)
+            ia = 1
+            # Bisect between ia and ib to find a bracket
+            (ia, ib) = bisect!(cache, ia, ib, phi_lim)  # This will evaluate mid-points and update history
+            is_bracketed = true
+        else
+            # Still going downhill and φ not increased significantly: expand further
+            cold = α;  φ_cold = φ_α   # save last good point
+            if nextfloat(cold) >= T(cache.alg.maxstep)
+                # Reached maximum step effectively
+                return LineSearchSolution(cold, ReturnCode.Success)
+            end
+            # Propose new α = α * rho
+            α = min(cold * T(cache.alg.rho), T(cache.alg.maxstep))
+            φ_α, dφ_α = cache.ϕdϕ(..., α, ...)
+            # Check finite again, possibly bisect if not finite
+            if !(isfinite(φ_α) && isfinite(dφ_α))
+                cache.alg.maxstep = α   # shrink maxstep to current α
+                # bisect between cold and α until finite or limit
+                local α_hi = α;  local α_lo = cold
+                local it_f = 1
+                while !(isfinite(φ_α) && isfinite(dφ_α)) && it_f < cache.alg.maxiters && α_hi > nextfloat(α_lo)
+                    α = (α_lo + α_hi)/2
+                    φ_α, dφ_α = cache.ϕdϕ(..., α, ...)
+                    if isfinite(φ_α) && isfinite(dφ_α)
+                        break
+                    end
+                    α_hi = α     # shrink upper bound
+                    it_f += 1
+                end
+                if !(isfinite(φ_α) && isfinite(dφ_α))
+                    # Failed to find finite in bracket
+                    return LineSearchSolution(cold, ReturnCode.Failure)
+                end
+            end
+            # Append this new point and continue loop
+            push!(cache.alphas, α); push!(cache.values, φ_α); push!(cache.slopes, dφ_α)
+            ib = length(cache.alphas)
+        end
+        iter += 1
+    end  # end bracketing loop
+
+    if !is_bracketed
+        # Bracketing failed within maxiters
+        return LineSearchSolution(cache.alphas[end], ReturnCode.Failure)
+    end
+
+    # Now have bracket [ia, ib] with ia < ib
+    while iter < cache.maxiters
+        # Current bracket interval
+        local a = cache.alphas[ia];  local b = cache.alphas[ib]
+        if b - a <= eps(b)
+            return LineSearchSolution(a, ReturnCode.Success)  # interval too small
+        end
+        # Interpolation step to find trial between a and b
+        local (is_wolfe, i_new, j_new) = secant2!(cache, ia, ib, phi_lim)
+        # secant2! will:
+        #  - evaluate φ and φ' at new trial alpha within (a,b)
+        #  - add new trial to cache.alphas/values/slopes
+        #  - return true/false if Wolfe met, and updated bracket indices.
+        if is_wolfe
+            # Wolfe conditions satisfied at new trial
+            local idx = i_new  # index of the accepted point
+            return LineSearchSolution(cache.alphas[idx], ReturnCode.Success)
+        else
+            # Not yet satisfied: update bracket and continue
+            ia = i_new;  ib = j_new
+        end
+        iter += 1
+    end
+
+    # If we exit loop by exceeding iterations:
+    return LineSearchSolution(cache.alphas[ia], ReturnCode.Failure)
+end
+
+function SciMLBase.reinit!(cache::HagerZhangCache; p=cache.p, stats=cache.stats)
+    cache.p = p
+    cache.stats = stats
+    cache.alpha = cache.initial_alpha
+    empty!(cache.alphas)
+    empty!(cache.values)
+    empty!(cache.slopes)
+    return cache
+end

From a7818b85e586e0cf052272626faa117f585dc438 Mon Sep 17 00:00:00 2001
From: "sonson.nguyen@mail.utoronto.ca" <sontheteacher0605@gmail.com>
Date: Wed, 2 Apr 2025 21:34:01 -0400
Subject: [PATCH 3/4] added new LineSearch.jl

---
 src/LineSearch.jl | 91 +++++++++++++++++++++++++----------------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/src/LineSearch.jl b/src/LineSearch.jl
index 57d3f0e..a3f7307 100644
--- a/src/LineSearch.jl
+++ b/src/LineSearch.jl
@@ -1,43 +1,48 @@
-module LineSearch
-
-using ADTypes: ADTypes
-using CommonSolve: CommonSolve
-using ConcreteStructs: @concrete
-using FastClosures: @closure
-using LinearAlgebra: norm, dot
-using MaybeInplace: @bb
-using SciMLBase: SciMLBase, AbstractSciMLProblem, AbstractNonlinearProblem, ReturnCode,
-                 NonlinearProblem, NonlinearLeastSquaresProblem, NonlinearFunction
-using SciMLJacobianOperators: VecJacOperator, JacVecOperator
-using StaticArraysCore: SArray
-
-abstract type AbstractLineSearchAlgorithm end
-abstract type AbstractLineSearchCache end
-
-# Needed for certain algorithms like RobustNonMonotoneLineSearch
-function callback_into_cache!(::AbstractLineSearchCache, _) end
-
-# By default, reinit! does nothing
-function SciMLBase.reinit!(::AbstractLineSearchCache; kwargs...) end
-
-include("utils.jl")
-
-include("backtracking.jl")
-include("li_fukushima.jl")
-include("no_search.jl")
-include("robust_non_monotone.jl")
-
-include("line_searches_ext.jl")
-
-@concrete struct LineSearchSolution
-    step_size
-    retcode::ReturnCode.T
-end
-
-export LineSearchSolution
-
-export BackTracking
-export NoLineSearch, LiFukushimaLineSearch, RobustNonMonotoneLineSearch
-export LineSearchesJL
-
-end
+module LineSearch
+
+using ADTypes: ADTypes
+using CommonSolve: CommonSolve
+using ConcreteStructs: @concrete
+using FastClosures: @closure
+using LinearAlgebra: norm, dot
+using MaybeInplace: @bb
+using SciMLBase: SciMLBase, AbstractSciMLProblem, AbstractNonlinearProblem, ReturnCode,
+                 NonlinearProblem, NonlinearLeastSquaresProblem, NonlinearFunction
+using SciMLJacobianOperators: VecJacOperator, JacVecOperator
+using StaticArraysCore: SArray
+
+# The 2 Core abstractions of this module
+# Base type for all line search algorithm
+abstract type AbstractLineSearchAlgorithm end 
+# Base type for all algorithm-specific caching
+abstract type AbstractLineSearchCache end
+
+# Needed for certain algorithms like RobustNonMonotoneLineSearch
+function callback_into_cache!(::AbstractLineSearchCache, _) end
+
+# By default, reinit! does nothing
+function SciMLBase.reinit!(::AbstractLineSearchCache; kwargs...) end
+
+include("utils.jl")
+
+include("backtracking.jl")
+include("li_fukushima.jl")
+include("no_search.jl")
+include("robust_non_monotone.jl")
+
+include("line_searches_ext.jl")
+include("hager_zhang.jl")
+
+@concrete struct LineSearchSolution
+    step_size
+    retcode::ReturnCode.T
+end
+
+export LineSearchSolution
+
+export BackTracking
+export HagerZhang
+export NoLineSearch, LiFukushimaLineSearch, RobustNonMonotoneLineSearch
+export LineSearchesJL
+
+end

From b887b44bb69bb7525626a764e83b516d2ec43a57 Mon Sep 17 00:00:00 2001
From: "sonson.nguyen@mail.utoronto.ca" <sontheteacher0605@gmail.com>
Date: Sun, 6 Apr 2025 21:49:33 -0400
Subject: [PATCH 4/4] added test for hager_zhang and editted hager_zhang

---
 src/hager_zhang.jl             |  48 +-----
 test/custom_optimizer_tests.jl | 304 +++++++++++++++++----------------
 2 files changed, 156 insertions(+), 196 deletions(-)

diff --git a/src/hager_zhang.jl b/src/hager_zhang.jl
index a76126b..95485d1 100644
--- a/src/hager_zhang.jl
+++ b/src/hager_zhang.jl
@@ -82,48 +82,6 @@ function CommonSolve.init(prob::AbstractNonlinearProblem, alg::HagerZhang,
                            α0, α0, stats, alg, alg.maxiters)
 end
 
-function CommonSolve.init(prob::AbstractNonlinearProblem, alg::HagerZhang,
-                        fu, u; stats=nothing, autodiff=nothing, kwargs...)
-    # 1. Determine numeric type
-    T = promote_type(eltype(fu), eltype(u))
-    # 2. Choose autodiff backend and construct derivative operator
-    autodiff = autodiff !== nothing ? autodiff : alg.autodiff
-    _, _, deriv_op = construct_jvp_or_vjp_operator(prob, fu, u; autodiff)
-    # 3. Allocate cache vectors for trial state
-    @bb u_cache = similar(u)
-    @bb fu_cache = similar(fu)
-    # 4. Define closures for φ(α) and φ+dφ(α)
-    ϕ   = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
-            @bb @. u_cache = u + α * du
-            fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p)
-            SciMLBase.add_nf!(stats)             # increment function eval count
-            return norm(fu_cache)^2 / 2          # objective value
-        end
-    ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin
-            @bb @. u_cache = u + α * du
-            fu_cache = SciMLBase.evaluate_f!!(f, fu_cache, u_cache, p)
-            SciMLBase.add_nf!(stats)
-            # Compute directional derivative via AD or analytic Jacobian:
-            deriv = deriv_op(du, u_cache, fu_cache, p)
-            obj = norm(fu_cache)^2 / 2
-            return obj, deriv                   # (φ, φ')
-        end
-    # 5. Initial step size α (respect maxstep and initial_alpha)
-    u_norm = norm(u, Inf)
-    α0 = if u_norm == 0 
-            one(T)              # if current u is zero-vector, use step = 1
-        else 
-            alg.initial_alpha isa Bool ? one(T) : T(alg.initial_alpha)
-        end
-    α0 = min(α0, T(alg.maxstep) / T(max(u_norm, one(T))))
-    # 6. Initialize cache and return
-    return HagerZhangCache(prob.f, prob.p, ϕ, ϕdϕ, deriv_op,
-        u_cache, fu_cache,
-        Vector{T}(), Vector{T}(), Vector{T}(),
-        α0, α0, stats, alg, alg.maxiters)
-end
-
-
 function CommonSolve.solve!(cache::HagerZhangCache, u, du)
     T = promote_type(eltype(u), eltype(du))
     # φ0 and dφ0 at alpha = 0
@@ -151,7 +109,7 @@ function CommonSolve.solve!(cache::HagerZhangCache, u, du)
     iter_finite = 0
     while !(isfinite(φ_α) && isfinite(dφ_α)) && iter_finite < cache.alg.maxiters
         α *= T(cache.alg.psi3)   # reduce step
-        φ_α, dφ_α = cache.ϕdϕ(..., α, ...)    # reevaluate
+        φ_α, dφ_α = cache.ϕdϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op)    # reevaluate
         iter_finite += 1
     end
     if !(isfinite(φ_α) && isfinite(dφ_α))
@@ -197,7 +155,7 @@ function CommonSolve.solve!(cache::HagerZhangCache, u, du)
             end
             # Propose new α = α * rho
             α = min(cold * T(cache.alg.rho), T(cache.alg.maxstep))
-            φ_α, dφ_α = cache.ϕdϕ(..., α, ...)
+            φ_α, dφ_α = cache.ϕdϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op)
             # Check finite again, possibly bisect if not finite
             if !(isfinite(φ_α) && isfinite(dφ_α))
                 cache.alg.maxstep = α   # shrink maxstep to current α
@@ -206,7 +164,7 @@ function CommonSolve.solve!(cache::HagerZhangCache, u, du)
                 local it_f = 1
                 while !(isfinite(φ_α) && isfinite(dφ_α)) && it_f < cache.alg.maxiters && α_hi > nextfloat(α_lo)
                     α = (α_lo + α_hi)/2
-                    φ_α, dφ_α = cache.ϕdϕ(..., α, ...)
+                    φ_α, dφ_α = cache.ϕdϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op)
                     if isfinite(φ_α) && isfinite(dφ_α)
                         break
                     end
diff --git a/test/custom_optimizer_tests.jl b/test/custom_optimizer_tests.jl
index 4a11f2e..81d94cd 100644
--- a/test/custom_optimizer_tests.jl
+++ b/test/custom_optimizer_tests.jl
@@ -1,151 +1,153 @@
-# Test based on https://julianlsolvers.github.io/LineSearches.jl/stable/examples/generated/customoptimizer.html
-@testsetup module CustomOptimizer
-using LinearAlgebra, SciMLBase, LineSearch, SciMLJacobianOperators
-
-function gradient_descent(
-        prob, alg; g_atol::Real = 1e-5, maxiters::Int = 10000, autodiff = nothing)
-    u = copy(prob.u0)
-    if SciMLBase.isinplace(prob)
-        fu = similar(u)
-        prob.f(fu, u, prob.p)
-    else
-        fu = prob.f(u, prob.p)
-    end
-
-    ls_cache = init(prob, alg, fu, u)
-    vjp_op = VecJacOperator(prob, fu, u; autodiff)
-
-    alphas = Float64[]
-    iter = 0
-    for _ in 1:maxiters
-        iter += 1
-        svjp_op = StatefulJacobianOperator(vjp_op, u, prob.p)
-        gs = svjp_op * fu .* 2
-        δu = -gs
-
-        ls_sol = solve!(ls_cache, u, δu)
-
-        push!(alphas, ls_sol.step_size)
-        @. u = u + ls_sol.step_size * δu
-        gnorm = norm(gs)
-
-        if SciMLBase.isinplace(prob)
-            fu = similar(u)
-            prob.f(fu, u, prob.p)
-        else
-            fu = prob.f(u, prob.p)
-        end
-
-        LineSearch.callback_into_cache!(ls_cache, fu)
-
-        gnorm < g_atol && break
-    end
-
-    return fu, u, iter, alphas
-end
-
-export gradient_descent
-
-end
-
-@testitem "LineSearches.jl: Custom Optimizer" setup=[CustomOptimizer] begin
-    using LineSearches, SciMLBase
-    using ADTypes, Tracker, ForwardDiff, Zygote, Enzyme, ReverseDiff, FiniteDiff
-
-    @testset "OOP Problem" begin
-        nlf(x, p) = [p[1] - x[1], 10.0 * (x[2] - x[1]^2)]
-        nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0])
-
-        @testset for autodiff in (
-            AutoTracker(), AutoForwardDiff(), AutoZygote(),
-            AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff()
-        )
-            @testset "method: $(nameof(typeof(method)))" for method in (
-                LineSearches.BackTracking(; order = 3),
-                StrongWolfe(),
-                HagerZhang(),
-                MoreThuente()
-            )
-                linesearch = LineSearchesJL(; method, autodiff)
-                fu, u, iter, alphas = gradient_descent(nlp, linesearch; autodiff)
-
-                @test fu≈[0.0, 0.0] atol=1e-2
-                @test u≈[1.0, 1.0] atol=1e-2
-                @test !all(isone, alphas)
-            end
-        end
-    end
-
-    @testset "In-Place Problem" begin
-        nlf(dx, x, p) = (dx .= [p[1] - x[1], 10.0 * (x[2] - x[1]^2)])
-        nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0])
-
-        @testset for autodiff in (
-            AutoForwardDiff(), AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff()
-        )
-            @testset "method: $(nameof(typeof(method)))" for method in (
-                LineSearches.BackTracking(; order = 3),
-                StrongWolfe(),
-                HagerZhang(),
-                MoreThuente()
-            )
-                linesearch = LineSearchesJL(; method, autodiff)
-                fu, u, iter, alphas = gradient_descent(nlp, linesearch; autodiff)
-
-                @test fu≈[0.0, 0.0] atol=1e-2
-                @test u≈[1.0, 1.0] atol=1e-2
-                @test !all(isone, alphas)
-            end
-        end
-    end
-end
-
-@testitem "Native Line Search: Custom Optimizer" setup=[CustomOptimizer] begin
-    using SciMLBase
-    using ADTypes, Tracker, ForwardDiff, Zygote, Enzyme, ReverseDiff, FiniteDiff
-
-    @testset "OOP Problem" begin
-        nlf(x, p) = [p[1] - x[1], 10.0 * (x[2] - x[1]^2)]
-        nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0])
-
-        @testset for autodiff in (
-            AutoTracker(), AutoForwardDiff(), AutoZygote(),
-            AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff()
-        )
-            @testset "method: $(nameof(typeof(method)))" for method in (
-                LiFukushimaLineSearch(),
-                NoLineSearch(0.001),
-                BackTracking(; order = Val(3), autodiff),
-                BackTracking(; order = Val(2), autodiff)
-            )
-                fu, u, iter, alphas = gradient_descent(nlp, method; autodiff)
-
-                @test fu≈[0.0, 0.0] atol=1e-1
-                @test u≈[1.0, 1.0] atol=1e-1
-                @test !all(isone, alphas)
-            end
-        end
-    end
-
-    @testset "In-Place Problem" begin
-        nlf(dx, x, p) = (dx .= [p[1] - x[1], 10.0 * (x[2] - x[1]^2)])
-        nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0])
-
-        @testset for autodiff in (
-            AutoForwardDiff(), AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff()
-        )
-            @testset "method: $(nameof(typeof(method)))" for method in (
-                LiFukushimaLineSearch(),
-                NoLineSearch(0.001),
-                BackTracking(; order = Val(3), autodiff),
-                BackTracking(; order = Val(2), autodiff)
-            )
-                fu, u, iter, alphas = gradient_descent(nlp, method; autodiff)
-
-                @test fu≈[0.0, 0.0] atol=1e-1
-                @test u≈[1.0, 1.0] atol=1e-1
-                @test !all(isone, alphas)
-            end
-        end
-    end
-end
+# Test based on https://julianlsolvers.github.io/LineSearches.jl/stable/examples/generated/customoptimizer.html
+@testsetup module CustomOptimizer
+using LinearAlgebra, SciMLBase, LineSearch, SciMLJacobianOperators
+
+function gradient_descent(
+        prob, alg; g_atol::Real = 1e-5, maxiters::Int = 10000, autodiff = nothing)
+    u = copy(prob.u0)
+    if SciMLBase.isinplace(prob)
+        fu = similar(u)
+        prob.f(fu, u, prob.p)
+    else
+        fu = prob.f(u, prob.p)
+    end
+
+    ls_cache = init(prob, alg, fu, u)
+    vjp_op = VecJacOperator(prob, fu, u; autodiff)
+
+    alphas = Float64[]
+    iter = 0
+    for _ in 1:maxiters
+        iter += 1
+        svjp_op = StatefulJacobianOperator(vjp_op, u, prob.p)
+        gs = svjp_op * fu .* 2
+        δu = -gs
+
+        ls_sol = solve!(ls_cache, u, δu)
+
+        push!(alphas, ls_sol.step_size)
+        @. u = u + ls_sol.step_size * δu
+        gnorm = norm(gs)
+
+        if SciMLBase.isinplace(prob)
+            fu = similar(u)
+            prob.f(fu, u, prob.p)
+        else
+            fu = prob.f(u, prob.p)
+        end
+
+        LineSearch.callback_into_cache!(ls_cache, fu)
+
+        gnorm < g_atol && break
+    end
+
+    return fu, u, iter, alphas
+end
+
+export gradient_descent
+
+end
+
+@testitem "LineSearches.jl: Custom Optimizer" setup=[CustomOptimizer] begin
+    using LineSearches, SciMLBase
+    using ADTypes, Tracker, ForwardDiff, Zygote, Enzyme, ReverseDiff, FiniteDiff
+
+    @testset "OOP Problem" begin
+        nlf(x, p) = [p[1] - x[1], 10.0 * (x[2] - x[1]^2)]
+        nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0])
+
+        @testset for autodiff in (
+            AutoTracker(), AutoForwardDiff(), AutoZygote(),
+            AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff()
+        )
+            @testset "method: $(nameof(typeof(method)))" for method in (
+                LineSearches.BackTracking(; order = 3),
+                StrongWolfe(),
+                HagerZhang(),
+                MoreThuente()
+            )
+                linesearch = LineSearchesJL(; method, autodiff)
+                fu, u, iter, alphas = gradient_descent(nlp, linesearch; autodiff)
+
+                @test fu≈[0.0, 0.0] atol=1e-2
+                @test u≈[1.0, 1.0] atol=1e-2
+                @test !all(isone, alphas)
+            end
+        end
+    end
+
+    @testset "In-Place Problem" begin
+        nlf(dx, x, p) = (dx .= [p[1] - x[1], 10.0 * (x[2] - x[1]^2)])
+        nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0])
+
+        @testset for autodiff in (
+            AutoForwardDiff(), AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff()
+        )
+            @testset "method: $(nameof(typeof(method)))" for method in (
+                LineSearches.BackTracking(; order = 3),
+                StrongWolfe(),
+                HagerZhang(),
+                MoreThuente()
+            )
+                linesearch = LineSearchesJL(; method, autodiff)
+                fu, u, iter, alphas = gradient_descent(nlp, linesearch; autodiff)
+
+                @test fu≈[0.0, 0.0] atol=1e-2
+                @test u≈[1.0, 1.0] atol=1e-2
+                @test !all(isone, alphas)
+            end
+        end
+    end
+end
+
+@testitem "Native Line Search: Custom Optimizer" setup=[CustomOptimizer] begin
+    using SciMLBase
+    using ADTypes, Tracker, ForwardDiff, Zygote, Enzyme, ReverseDiff, FiniteDiff
+
+    @testset "OOP Problem" begin
+        nlf(x, p) = [p[1] - x[1], 10.0 * (x[2] - x[1]^2)]
+        nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0])
+
+        @testset for autodiff in (
+            AutoTracker(), AutoForwardDiff(), AutoZygote(),
+            AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff()
+        )
+            @testset "method: $(nameof(typeof(method)))" for method in (
+                LiFukushimaLineSearch(),
+                NoLineSearch(0.001),
+                BackTracking(; order = Val(3), autodiff),
+                BackTracking(; order = Val(2), autodiff),
+                HagerZhang(; autodiff = autodiff)
+            )
+                fu, u, iter, alphas = gradient_descent(nlp, method; autodiff)
+
+                @test fu≈[0.0, 0.0] atol=1e-1
+                @test u≈[1.0, 1.0] atol=1e-1
+                @test !all(isone, alphas)
+            end
+        end
+    end
+
+    @testset "In-Place Problem" begin
+        nlf(dx, x, p) = (dx .= [p[1] - x[1], 10.0 * (x[2] - x[1]^2)])
+        nlp = NonlinearProblem(nlf, [-1.0, 1.0], [1.0])
+
+        @testset for autodiff in (
+            AutoForwardDiff(), AutoEnzyme(), AutoReverseDiff(), AutoFiniteDiff()
+        )
+            @testset "method: $(nameof(typeof(method)))" for method in (
+                LiFukushimaLineSearch(),
+                NoLineSearch(0.001),
+                BackTracking(; order = Val(3), autodiff),
+                BackTracking(; order = Val(2), autodiff),
+                HagerZhang(; autodiff)
+            )
+                fu, u, iter, alphas = gradient_descent(nlp, method; autodiff)
+
+                @test fu≈[0.0, 0.0] atol=1e-1
+                @test u≈[1.0, 1.0] atol=1e-1
+                @test !all(isone, alphas)
+            end
+        end
+    end
+end