From f82685369638b42eb8d1b51fb1edcbdf5135606c Mon Sep 17 00:00:00 2001
From: Patrick Kofod Mogensen <patrick.mogensen@gmail.com>
Date: Wed, 26 Mar 2025 07:30:51 +0100
Subject: [PATCH 1/5] Add check_flatness keyword to HZ and format

---
 src/hagerzhang.jl | 306 +++++++++++++++++++++++++++++-----------------
 test/issues.jl    | 159 ++++++++++++++++++++++--
 2 files changed, 344 insertions(+), 121 deletions(-)

diff --git a/src/hagerzhang.jl b/src/hagerzhang.jl
index 9d1be56..e8688fa 100644
--- a/src/hagerzhang.jl
+++ b/src/hagerzhang.jl
@@ -44,19 +44,19 @@
 # Display flags are represented as a bitfield
 # (not exported, but can use via LineSearches.ITER, for example)
 const one64 = convert(UInt64, 1)
-const FINAL       = one64
-const ITER        = one64 << 1
-const PARAMETERS  = one64 << 2
-const GRADIENT    = one64 << 3
-const SEARCHDIR   = one64 << 4
-const ALPHA       = one64 << 5
-const BETA        = one64 << 6
+const FINAL = one64
+const ITER = one64 << 1
+const PARAMETERS = one64 << 2
+const GRADIENT = one64 << 3
+const SEARCHDIR = one64 << 4
+const ALPHA = one64 << 5
+const BETA = one64 << 6
 # const ALPHAGUESS  = one64 << 7 TODO: not needed
-const BRACKET     = one64 << 8
-const LINESEARCH  = one64 << 9
-const UPDATE      = one64 << 10
-const SECANT2     = one64 << 11
-const BISECT      = one64 << 12
+const BRACKET = one64 << 8
+const LINESEARCH = one64 << 9
+const UPDATE = one64 << 10
+const SECANT2 = one64 << 11
+const BISECT = one64 << 12
 const BARRIERCOEF = one64 << 13
 display_nextbit = 14
 
@@ -80,24 +80,32 @@ Conjugate gradient line search implementation from:
     conjugate gradient method with guaranteed descent. ACM
     Transactions on Mathematical Software 32: 113–137.
 """
-@with_kw struct HagerZhang{T, Tm} <: AbstractLineSearch
-   delta::T = DEFAULTDELTA # c_1 Wolfe sufficient decrease condition
-   sigma::T = DEFAULTSIGMA # c_2 Wolfe curvature condition (Recommend 0.1 for GradientDescent)
-   alphamax::T = Inf
-   rho::T = 5.0
-   epsilon::T = 1e-6
-   gamma::T = 0.66
-   linesearchmax::Int = 50
-   psi3::T = 0.1
-   display::Int = 0
-   mayterminate::Tm = Ref{Bool}(false)
-   cache::Union{Nothing,LineSearchCache{T}} = nothing
+@with_kw struct HagerZhang{T,Tm} <: AbstractLineSearch
+    delta::T = DEFAULTDELTA # c_1 Wolfe sufficient decrease condition
+    sigma::T = DEFAULTSIGMA # c_2 Wolfe curvature condition (Recommend 0.1 for GradientDescent)
+    alphamax::T = Inf
+    rho::T = 5.0
+    epsilon::T = 1e-6
+    gamma::T = 0.66
+    linesearchmax::Int = 50
+    psi3::T = 0.1
+    display::Int = 0
+    mayterminate::Tm = Ref{Bool}(false)
+    cache::Union{Nothing,LineSearchCache{T}} = nothing
+    check_flatness::Bool = false
 end
-HagerZhang{T}(args...; kwargs...) where T = HagerZhang{T, Base.RefValue{Bool}}(args...; kwargs...)
-
-function (ls::HagerZhang)(df::AbstractObjective, x::AbstractArray{T},
-                          s::AbstractArray{T}, α::Real,
-                          x_new::AbstractArray{T}, phi_0::Real, dphi_0::Real) where T
+HagerZhang{T}(args...; kwargs...) where {T} =
+    HagerZhang{T,Base.RefValue{Bool}}(args...; kwargs...)
+
+function (ls::HagerZhang)(
+    df::AbstractObjective,
+    x::AbstractArray{T},
+    s::AbstractArray{T},
+    α::Real,
+    x_new::AbstractArray{T},
+    phi_0::Real,
+    dphi_0::Real,
+) where {T}
     ϕ, ϕdϕ = make_ϕ_ϕdϕ(df, x_new, x, s)
     ls(ϕ, ϕdϕ, α::Real, phi_0, dphi_0)
 end
@@ -105,18 +113,26 @@ end
 (ls::HagerZhang)(ϕ, dϕ, ϕdϕ, c, phi_0, dphi_0) = ls(ϕ, ϕdϕ, c, phi_0, dphi_0)
 
 # TODO: Should we deprecate the interface that only uses the ϕ and ϕd\phi arguments?
-function (ls::HagerZhang)(ϕ, ϕdϕ,
-                          c::T,
-                          phi_0::Real,
-                          dphi_0::Real) where T # Should c and phi_0 be same type?
-    @unpack delta, sigma, alphamax, rho, epsilon, gamma,
-            linesearchmax, psi3, display, mayterminate, cache = ls
+function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T} # Should c and phi_0 be same type?
+    @unpack delta,
+    sigma,
+    alphamax,
+    rho,
+    epsilon,
+    gamma,
+    linesearchmax,
+    psi3,
+    display,
+    mayterminate,
+    cache = ls
     emptycache!(cache)
 
     zeroT = convert(T, 0)
     pushcache!(cache, zeroT, phi_0, dphi_0)
     if !(isfinite(phi_0) && isfinite(dphi_0))
-        throw(LineSearchException("Value and slope at step length = 0 must be finite.", T(0)))
+        throw(
+            LineSearchException("Value and slope at step length = 0 must be finite.", T(0)),
+        )
     end
     if dphi_0 >= eps(T) * abs(phi_0)
         throw(LineSearchException("Search direction is not a direction of descent.", T(0)))
@@ -163,7 +179,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
     # If c was generated by quadratic interpolation, check whether it
     # satisfies the Wolfe conditions
     if mayterminate[] &&
-          satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
+       satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
         if display & LINESEARCH > 0
             println("Wolfe condition satisfied on point alpha = ", c)
         end
@@ -179,17 +195,24 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
     cold = -one(T)
     while !isbracketed && iter < linesearchmax
         if display & BRACKET > 0
-            println("bracketing: ia = ", ia,
-                    ", ib = ", ib,
-                    ", c = ", c,
-                    ", phi_c = ", phi_c,
-                    ", dphi_c = ", dphi_c)
+            println(
+                "bracketing: ia = ",
+                ia,
+                ", ib = ",
+                ib,
+                ", c = ",
+                c,
+                ", phi_c = ",
+                phi_c,
+                ", dphi_c = ",
+                dphi_c,
+            )
         end
         if dphi_c >= zeroT
             # We've reached the upward slope, so we have b; examine
             # previous values to find a
             ib = length(alphas)
-            for i = (ib - 1):-1:1
+            for i = (ib-1):-1:1
                 if values[i] <= phi_lim
                     ia = i
                     break
@@ -201,7 +224,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             # have crested over the peak. Use bisection.
             ib = length(alphas)
             ia = 1
-            if c ≉  alphas[ib] || slopes[ib] >= zeroT
+            if c ≉ alphas[ib] || slopes[ib] >= zeroT
                 error("c = ", c)
             end
             # ia, ib = bisect(phi, lsr, ia, ib, phi_lim) # TODO: Pass options
@@ -226,13 +249,19 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             if c > alphamax
                 c = alphamax
                 if display & BRACKET > 0
-                    println("bracket: exceeding alphamax, using c = alphamax = ", alphamax,
-                    ", cold = ", cold)
+                    println(
+                        "bracket: exceeding alphamax, using c = alphamax = ",
+                        alphamax,
+                        ", cold = ",
+                        cold,
+                    )
                 end
             end
             phi_c, dphi_c = ϕdϕ(c)
             iterfinite = 1
-            while !(isfinite(phi_c) && isfinite(dphi_c)) && c > nextfloat(cold) && iterfinite < iterfinitemax
+            while !(isfinite(phi_c) && isfinite(dphi_c)) &&
+                      c > nextfloat(cold) &&
+                      iterfinite < iterfinitemax
                 alphamax = c # shrinks alphamax, assumes that steps >= c can never have finite phi_c and dphi_c
                 iterfinite += 1
                 if display & BRACKET > 0
@@ -243,11 +272,19 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             end
             if !(isfinite(phi_c) && isfinite(dphi_c))
                 if display & BRACKET > 0
-                    println("Warning: failed to expand interval to bracket with finite values. If this happens frequently, check your function and gradient.")
-                    println("c = ", c,
-                            ", alphamax = ", alphamax,
-                            ", phi_c = ", phi_c,
-                            ", dphi_c = ", dphi_c)
+                    println(
+                        "Warning: failed to expand interval to bracket with finite values. If this happens frequently, check your function and gradient.",
+                    )
+                    println(
+                        "c = ",
+                        c,
+                        ", alphamax = ",
+                        alphamax,
+                        ", phi_c = ",
+                        phi_c,
+                        ", dphi_c = ",
+                        dphi_c,
+                    )
                 end
                 return cold, phi_cold
             end
@@ -262,18 +299,27 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
         b = alphas[ib]
         @assert b > a
         if display & LINESEARCH > 0
-            println("linesearch: ia = ", ia,
-                    ", ib = ", ib,
-                    ", a = ", a,
-                    ", b = ", b,
-                    ", phi(a) = ", values[ia],
-                    ", phi(b) = ", values[ib])
+            println(
+                "linesearch: ia = ",
+                ia,
+                ", ib = ",
+                ib,
+                ", a = ",
+                a,
+                ", b = ",
+                b,
+                ", phi(a) = ",
+                values[ia],
+                ", phi(b) = ",
+                values[ib],
+            )
         end
         if b - a <= eps(b)
             mayterminate[] = false # reset in case another initial guess is used next
             return a, values[ia] # lsr.value[ia]
         end
-        iswolfe, iA, iB = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
+        iswolfe, iA, iB =
+            secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
         if iswolfe
             mayterminate[] = false # reset in case another initial guess is used next
             return alphas[iA], values[iA] # lsr.value[iA]
@@ -285,12 +331,15 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             if display & LINESEARCH > 0
                 println("Linesearch: secant succeeded")
             end
-            if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB]
+            if ls.check_flatness &&
+               nextfloat(values[ia]) >= values[ib] &&
+               nextfloat(values[iA]) >= values[iB]
                 # It's so flat, secant didn't do anything useful, time to quit
                 if display & LINESEARCH > 0
                     println("Linesearch: secant suggests it's flat")
                 end
                 mayterminate[] = false # reset in case another initial guess is used next
+
                 return A, values[iA]
             end
             ia = iA
@@ -308,30 +357,44 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             push!(values, phi_c)
             push!(slopes, dphi_c)
 
-            ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display)
+            ia, ib = update!(
+                ϕdϕ,
+                alphas,
+                values,
+                slopes,
+                iA,
+                iB,
+                length(alphas),
+                phi_lim,
+                display,
+            )
         end
         iter += 1
     end
 
-    throw(LineSearchException("Linesearch failed to converge, reached maximum iterations $(linesearchmax).",
-                              alphas[ia]))
+    throw(
+        LineSearchException(
+            "Linesearch failed to converge, reached maximum iterations $(linesearchmax).",
+            alphas[ia],
+        ),
+    )
 
 
 end
 
 # Check Wolfe & approximate Wolfe
-function satisfies_wolfe(c::T,
-                         phi_c::Real,
-                         dphi_c::Real,
-                         phi_0::Real,
-                         dphi_0::Real,
-                         phi_lim::Real,
-                         delta::Real,
-                         sigma::Real) where T<:Number
-    wolfe1 = delta * dphi_0 >= (phi_c - phi_0) / c &&
-               dphi_c >= sigma * dphi_0
-    wolfe2 = (2 * delta - 1) * dphi_0 >= dphi_c >= sigma * dphi_0 &&
-               phi_c <= phi_lim
+function satisfies_wolfe(
+    c::T,
+    phi_c::Real,
+    dphi_c::Real,
+    phi_0::Real,
+    dphi_0::Real,
+    phi_lim::Real,
+    delta::Real,
+    sigma::Real,
+) where {T<:Number}
+    wolfe1 = delta * dphi_0 >= (phi_c - phi_0) / c && dphi_c >= sigma * dphi_0
+    wolfe2 = (2 * delta - 1) * dphi_0 >= dphi_c >= sigma * dphi_0 && phi_c <= phi_lim
     return wolfe1 || wolfe2
 end
 
@@ -343,16 +406,18 @@ function secant(alphas, values, slopes, ia::Integer, ib::Integer)
     return secant(alphas[ia], alphas[ib], slopes[ia], slopes[ib])
 end
 # phi
-function secant2!(ϕdϕ,
-                  alphas,
-                  values,
-                  slopes,
-                  ia::Integer,
-                  ib::Integer,
-                  phi_lim::Real,
-                  delta::Real = DEFAULTDELTA,
-                  sigma::Real = DEFAULTSIGMA,
-                  display::Integer = 0)
+function secant2!(
+    ϕdϕ,
+    alphas,
+    values,
+    slopes,
+    ia::Integer,
+    ib::Integer,
+    phi_lim::Real,
+    delta::Real = DEFAULTDELTA,
+    sigma::Real = DEFAULTSIGMA,
+    display::Integer = 0,
+)
     phi_0 = values[1]
     dphi_0 = slopes[1]
     a = alphas[ia]
@@ -362,9 +427,13 @@ function secant2!(ϕdϕ,
     T = eltype(slopes)
     zeroT = convert(T, 0)
     if !(dphi_a < zeroT && dphi_b >= zeroT)
-        error(string("Search direction is not a direction of descent; ",
-                     "this error may indicate that user-provided derivatives are inaccurate. ",
-                      @sprintf "(dphi_a = %f; dphi_b = %f)" dphi_a dphi_b))
+        error(
+            string(
+                "Search direction is not a direction of descent; ",
+                "this error may indicate that user-provided derivatives are inaccurate. ",
+                @sprintf "(dphi_a = %f; dphi_b = %f)" dphi_a dphi_b
+            ),
+        )
     end
     c = secant(a, b, dphi_a, dphi_b)
     if display & SECANT2 > 0
@@ -433,15 +502,17 @@ end
 # Given a third point, pick the best two that retain the bracket
 # around the minimum (as defined by HZ, eq. 29)
 # b will be the upper bound, and a the lower bound
-function update!(ϕdϕ,
-                 alphas,
-                 values,
-                 slopes,
-                 ia::Integer,
-                 ib::Integer,
-                 ic::Integer,
-                 phi_lim::Real,
-                 display::Integer = 0)
+function update!(
+    ϕdϕ,
+    alphas,
+    values,
+    slopes,
+    ia::Integer,
+    ib::Integer,
+    ic::Integer,
+    phi_lim::Real,
+    display::Integer = 0,
+)
     a = alphas[ia]
     b = alphas[ib]
     T = eltype(slopes)
@@ -455,13 +526,22 @@ function update!(ϕdϕ,
     phi_c = values[ic]
     dphi_c = slopes[ic]
     if display & UPDATE > 0
-        println("update: ia = ", ia,
-                ", a = ", a,
-                ", ib = ", ib,
-                ", b = ", b,
-                ", c = ", c,
-                ", phi_c = ", phi_c,
-                ", dphi_c = ", dphi_c)
+        println(
+            "update: ia = ",
+            ia,
+            ", a = ",
+            a,
+            ", ib = ",
+            ib,
+            ", b = ",
+            b,
+            ", c = ",
+            c,
+            ", phi_c = ",
+            phi_c,
+            ", dphi_c = ",
+            dphi_c,
+        )
     end
     if c < a || c > b
         return ia, ib #, 0, 0  # it's out of the bracketing interval
@@ -483,14 +563,16 @@ function update!(ϕdϕ,
 end
 
 # HZ, stage U3 (with theta=0.5)
-function bisect!(ϕdϕ,
-                 alphas::AbstractArray{T},
-                 values,
-                 slopes,
-                 ia::Integer,
-                 ib::Integer,
-                 phi_lim::Real,
-                 display::Integer = 0) where T
+function bisect!(
+    ϕdϕ,
+    alphas::AbstractArray{T},
+    values,
+    slopes,
+    ia::Integer,
+    ib::Integer,
+    phi_lim::Real,
+    display::Integer = 0,
+) where {T}
     gphi = convert(T, NaN)
     a = alphas[ia]
     b = alphas[ib]
diff --git a/test/issues.jl b/test/issues.jl
index d0d01f4..7d717fa 100644
--- a/test/issues.jl
+++ b/test/issues.jl
@@ -3,32 +3,32 @@ using LineSearches, LinearAlgebra, Test
 
 A = randn(100, 100)
 x0 = randn(100)
-b = A*x0
+b = A * x0
 
 # Objective function and gradient
-f(x) = .5*norm(A*x - b)^2
-g!(gvec, x) = (gvec .= A'*(A*x-b))
+f(x) = 0.5 * norm(A * x - b)^2
+g!(gvec, x) = (gvec .= A' * (A * x - b))
 fg!(gvec, x) = (g!(gvec, x); return f(x))
 
 # Init
-x = 1f1*randn(100)
+x = 1.0f1 * randn(100)
 gv = similar(x)
 
 # Line search
-α0 = 1f-3
+α0 = 1.0f-3
 ϕ0 = fg!(gv, x)
-s = -1*gv
+s = -1 * gv
 dϕ0 = dot(gv, s)
 println(ϕ0, ", ", dϕ0)
 
 # Univariate line search functions
-ϕ(α) = f(x .+ α.*s)
+ϕ(α) = f(x .+ α .* s)
 function dϕ(α)
-    g!(gv, x .+ α.*s)
+    g!(gv, x .+ α .* s)
     return dot(gv, s)
 end
 function ϕdϕ(α)
-    phi = fg!(gv, x .+ α.*s)
+    phi = fg!(gv, x .+ α .* s)
     dphi = dot(gv, s)
     return (phi, dphi)
 end
@@ -36,3 +36,144 @@ end
 res = (StrongWolfe())(ϕ, dϕ, ϕdϕ, α0, ϕ0, dϕ0)
 @test res[2] > 0
 @test res[2] == ϕ(res[1])
+
+struct LineSearchTestCase
+    alphas::Vector{Float64}
+    values::Vector{Float64}
+    slopes::Vector{Float64}
+end
+
+@testset "HZ convergence issues" begin
+    @testset "Flatness check issues" begin
+        function prepare_test_case(; alphas, values, slopes)
+            perm = sortperm(alphas)
+            alphas = alphas[perm]
+            push!(alphas, alphas[end] + 1)
+            values = values[perm]
+            push!(values, values[end])
+            slopes = slopes[perm]
+            push!(slopes, 0.0)
+            return LineSearchTestCase(alphas, values, slopes)
+        end
+
+        tc1 = prepare_test_case(;
+            alphas = [0.0, 1.0, 5.0, 3.541670844449739],
+            values = [
+                3003.592409634743,
+                2962.0378569864743,
+                2891.4462095232184,
+                3000.9760725116876,
+            ],
+            slopes = [
+                -22332.321416890798,
+                -20423.214551925797,
+                11718.185026267562,
+                -22286.821227217057,
+            ],
+        )
+
+        function tc_to_f(tc)
+            function f(x)
+                i = findfirst(u -> u > x, tc.alphas) - 1
+                xk = tc.alphas[i]
+                xkp1 = tc.alphas[i+1]
+                dx = xkp1 - xk
+                t = (x - xk) / dx
+                h00t = 2t^3 - 3t^2 + 1
+                h10t = t * (1 - t)^2
+                h01t = t^2 * (3 - 2t)
+                h11t = t^2 * (t - 1)
+                val =
+                    h00t * tc.values[i] +
+                    h10t * dx * tc.slopes[i] +
+                    h01t * tc.values[i+1] +
+                    h11t * dx * tc.slopes[i+1]
+
+                return val
+            end
+        end
+        function tc_to_fdf(tc)
+            function fdf(x)
+                i = findfirst(u -> u > x, tc.alphas) - 1
+                xk = tc.alphas[i]
+                xkp1 = tc.alphas[i+1]
+                dx = xkp1 - xk
+                t = (x - xk) / dx
+                h00t = 2t^3 - 3t^2 + 1
+                h10t = t * (1 - t)^2
+                h01t = t^2 * (3 - 2t)
+                h11t = t^2 * (t - 1)
+                val =
+                    h00t * tc.values[i] +
+                    h10t * dx * tc.slopes[i] +
+                    h01t * tc.values[i+1] +
+                    h11t * dx * tc.slopes[i+1]
+
+                h00tp = 6t^2 - 6t
+                h10tp = 3t^2 - 4t + 1
+                h01tp = -6t^2 + 6 * t
+                h11tp = 3t^2 - 2t
+                slope =
+                    (
+                        h00tp * tc.values[i] +
+                        h10tp * dx * tc.slopes[i] +
+                        h01tp * tc.values[i+1] +
+                        h11tp * dx * tc.slopes[i+1]
+                    ) / dx
+                println(x, " ", val, " ", slope)
+                return val, slope
+            end
+        end
+
+        function test_tc(tc, check_flatness)
+            cache = LineSearchCache{Float64}()
+            hz = HagerZhang(; cache, check_flatness)
+            f = tc_to_f(tc)
+            fdf = tc_to_fdf(tc)
+            hz(f, fdf, 1.0, fdf(0.0)...), cache
+        end
+
+        res, res_cache = test_tc(tc1, true)
+        @test_broken minimum(res_cache.values) == res[2]
+
+        res2, res_cache2 = test_tc(tc1, false)
+        @test minimum(res_cache2.values) == res2[2]
+        #=
+        using AlgebraOfGraphics, CairoMakie
+        draw(data((x=0.0:0.05:5.5, y=map(x->tc_to_f(tc1)(x), 0:0.05:5.5)))*mapping(:x,:y)*visual(Scatter)+
+        data((alphas=res_cache.alphas, values=res_cache.values))*mapping(:alphas,:values)*visual(Scatter; color=:red))
+        =#
+    end
+
+    # should add as upstream
+    #=
+    @testset "from kbarros" begin
+        # The minimizer is x0=[0, 2πn/100], with f(x0) = 1. Any integer n is fine.
+        function f(x)
+            return (x[1]^2 + 1) * (2 - cos(100*x[2]))
+        end
+
+        using Optim
+
+        function test_converges(method)
+            for i in 1:100
+                r = randn(2)
+                res = optimize(f, r, method)
+                if Optim.converged(res) && minimum(res) > f([0,0]) + 1e-8
+                    println("""
+                        Incorrectly reported convergence after $(res.iterations) iterations
+                        Reached x = $(Optim.minimizer(res)) with f(x) = $(minimum(res))
+                        """)
+                end
+            end
+        end
+
+        # Works successfully, no printed output
+        test_converges(LBFGS(; linesearch=Optim.LineSearches.BackTracking(order=2)))
+
+        # Prints ~10 failures to converge (in 100 tries). Frequently fails after the
+        # first line search.
+        test_converges(ConjugateGradient(; linesearch=Optim.LineSearches.HagerZhang(check_flatness=false)))
+    end
+    =#
+end

From a1859b203d3b89f334e9333f59a3490824cb1767 Mon Sep 17 00:00:00 2001
From: Patrick Kofod Mogensen <patrick.mogensen@gmail.com>
Date: Wed, 26 Mar 2025 07:37:57 +0100
Subject: [PATCH 2/5] undo format

---
 src/hagerzhang.jl | 306 +++++++++++++++++-----------------------------
 1 file changed, 113 insertions(+), 193 deletions(-)

diff --git a/src/hagerzhang.jl b/src/hagerzhang.jl
index e8688fa..61a0b4d 100644
--- a/src/hagerzhang.jl
+++ b/src/hagerzhang.jl
@@ -44,19 +44,19 @@
 # Display flags are represented as a bitfield
 # (not exported, but can use via LineSearches.ITER, for example)
 const one64 = convert(UInt64, 1)
-const FINAL = one64
-const ITER = one64 << 1
-const PARAMETERS = one64 << 2
-const GRADIENT = one64 << 3
-const SEARCHDIR = one64 << 4
-const ALPHA = one64 << 5
-const BETA = one64 << 6
+const FINAL       = one64
+const ITER        = one64 << 1
+const PARAMETERS  = one64 << 2
+const GRADIENT    = one64 << 3
+const SEARCHDIR   = one64 << 4
+const ALPHA       = one64 << 5
+const BETA        = one64 << 6
 # const ALPHAGUESS  = one64 << 7 TODO: not needed
-const BRACKET = one64 << 8
-const LINESEARCH = one64 << 9
-const UPDATE = one64 << 10
-const SECANT2 = one64 << 11
-const BISECT = one64 << 12
+const BRACKET     = one64 << 8
+const LINESEARCH  = one64 << 9
+const UPDATE      = one64 << 10
+const SECANT2     = one64 << 11
+const BISECT      = one64 << 12
 const BARRIERCOEF = one64 << 13
 display_nextbit = 14
 
@@ -80,32 +80,25 @@ Conjugate gradient line search implementation from:
     conjugate gradient method with guaranteed descent. ACM
     Transactions on Mathematical Software 32: 113–137.
 """
-@with_kw struct HagerZhang{T,Tm} <: AbstractLineSearch
-    delta::T = DEFAULTDELTA # c_1 Wolfe sufficient decrease condition
-    sigma::T = DEFAULTSIGMA # c_2 Wolfe curvature condition (Recommend 0.1 for GradientDescent)
-    alphamax::T = Inf
-    rho::T = 5.0
-    epsilon::T = 1e-6
-    gamma::T = 0.66
-    linesearchmax::Int = 50
-    psi3::T = 0.1
-    display::Int = 0
-    mayterminate::Tm = Ref{Bool}(false)
-    cache::Union{Nothing,LineSearchCache{T}} = nothing
-    check_flatness::Bool = false
+@with_kw struct HagerZhang{T, Tm} <: AbstractLineSearch
+   delta::T = DEFAULTDELTA # c_1 Wolfe sufficient decrease condition
+   sigma::T = DEFAULTSIGMA # c_2 Wolfe curvature condition (Recommend 0.1 for GradientDescent)
+   alphamax::T = Inf
+   rho::T = 5.0
+   epsilon::T = 1e-6
+   gamma::T = 0.66
+   linesearchmax::Int = 50
+   psi3::T = 0.1
+   display::Int = 0
+   mayterminate::Tm = Ref{Bool}(false)
+   cache::Union{Nothing,LineSearchCache{T}} = nothing
+   check_flatness::Bool = false
 end
-HagerZhang{T}(args...; kwargs...) where {T} =
-    HagerZhang{T,Base.RefValue{Bool}}(args...; kwargs...)
-
-function (ls::HagerZhang)(
-    df::AbstractObjective,
-    x::AbstractArray{T},
-    s::AbstractArray{T},
-    α::Real,
-    x_new::AbstractArray{T},
-    phi_0::Real,
-    dphi_0::Real,
-) where {T}
+HagerZhang{T}(args...; kwargs...) where T = HagerZhang{T, Base.RefValue{Bool}}(args...; kwargs...)
+
+function (ls::HagerZhang)(df::AbstractObjective, x::AbstractArray{T},
+                          s::AbstractArray{T}, α::Real,
+                          x_new::AbstractArray{T}, phi_0::Real, dphi_0::Real) where T
     ϕ, ϕdϕ = make_ϕ_ϕdϕ(df, x_new, x, s)
     ls(ϕ, ϕdϕ, α::Real, phi_0, dphi_0)
 end
@@ -113,26 +106,18 @@ end
 (ls::HagerZhang)(ϕ, dϕ, ϕdϕ, c, phi_0, dphi_0) = ls(ϕ, ϕdϕ, c, phi_0, dphi_0)
 
 # TODO: Should we deprecate the interface that only uses the ϕ and ϕd\phi arguments?
-function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T} # Should c and phi_0 be same type?
-    @unpack delta,
-    sigma,
-    alphamax,
-    rho,
-    epsilon,
-    gamma,
-    linesearchmax,
-    psi3,
-    display,
-    mayterminate,
-    cache = ls
+function (ls::HagerZhang)(ϕ, ϕdϕ,
+                          c::T,
+                          phi_0::Real,
+                          dphi_0::Real) where T # Should c and phi_0 be same type?
+    @unpack delta, sigma, alphamax, rho, epsilon, gamma,
+            linesearchmax, psi3, display, mayterminate, cache = ls
     emptycache!(cache)
 
     zeroT = convert(T, 0)
     pushcache!(cache, zeroT, phi_0, dphi_0)
     if !(isfinite(phi_0) && isfinite(dphi_0))
-        throw(
-            LineSearchException("Value and slope at step length = 0 must be finite.", T(0)),
-        )
+        throw(LineSearchException("Value and slope at step length = 0 must be finite.", T(0)))
     end
     if dphi_0 >= eps(T) * abs(phi_0)
         throw(LineSearchException("Search direction is not a direction of descent.", T(0)))
@@ -179,7 +164,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T}
     # If c was generated by quadratic interpolation, check whether it
     # satisfies the Wolfe conditions
     if mayterminate[] &&
-       satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
+          satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
         if display & LINESEARCH > 0
             println("Wolfe condition satisfied on point alpha = ", c)
         end
@@ -195,24 +180,17 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T}
     cold = -one(T)
     while !isbracketed && iter < linesearchmax
         if display & BRACKET > 0
-            println(
-                "bracketing: ia = ",
-                ia,
-                ", ib = ",
-                ib,
-                ", c = ",
-                c,
-                ", phi_c = ",
-                phi_c,
-                ", dphi_c = ",
-                dphi_c,
-            )
+            println("bracketing: ia = ", ia,
+                    ", ib = ", ib,
+                    ", c = ", c,
+                    ", phi_c = ", phi_c,
+                    ", dphi_c = ", dphi_c)
         end
         if dphi_c >= zeroT
             # We've reached the upward slope, so we have b; examine
             # previous values to find a
             ib = length(alphas)
-            for i = (ib-1):-1:1
+            for i = (ib - 1):-1:1
                 if values[i] <= phi_lim
                     ia = i
                     break
@@ -224,7 +202,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T}
             # have crested over the peak. Use bisection.
             ib = length(alphas)
             ia = 1
-            if c ≉ alphas[ib] || slopes[ib] >= zeroT
+            if c ≉  alphas[ib] || slopes[ib] >= zeroT
                 error("c = ", c)
             end
             # ia, ib = bisect(phi, lsr, ia, ib, phi_lim) # TODO: Pass options
@@ -249,19 +227,13 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T}
             if c > alphamax
                 c = alphamax
                 if display & BRACKET > 0
-                    println(
-                        "bracket: exceeding alphamax, using c = alphamax = ",
-                        alphamax,
-                        ", cold = ",
-                        cold,
-                    )
+                    println("bracket: exceeding alphamax, using c = alphamax = ", alphamax,
+                    ", cold = ", cold)
                 end
             end
             phi_c, dphi_c = ϕdϕ(c)
             iterfinite = 1
-            while !(isfinite(phi_c) && isfinite(dphi_c)) &&
-                      c > nextfloat(cold) &&
-                      iterfinite < iterfinitemax
+            while !(isfinite(phi_c) && isfinite(dphi_c)) && c > nextfloat(cold) && iterfinite < iterfinitemax
                 alphamax = c # shrinks alphamax, assumes that steps >= c can never have finite phi_c and dphi_c
                 iterfinite += 1
                 if display & BRACKET > 0
@@ -272,19 +244,11 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T}
             end
             if !(isfinite(phi_c) && isfinite(dphi_c))
                 if display & BRACKET > 0
-                    println(
-                        "Warning: failed to expand interval to bracket with finite values. If this happens frequently, check your function and gradient.",
-                    )
-                    println(
-                        "c = ",
-                        c,
-                        ", alphamax = ",
-                        alphamax,
-                        ", phi_c = ",
-                        phi_c,
-                        ", dphi_c = ",
-                        dphi_c,
-                    )
+                    println("Warning: failed to expand interval to bracket with finite values. If this happens frequently, check your function and gradient.")
+                    println("c = ", c,
+                            ", alphamax = ", alphamax,
+                            ", phi_c = ", phi_c,
+                            ", dphi_c = ", dphi_c)
                 end
                 return cold, phi_cold
             end
@@ -299,27 +263,18 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T}
         b = alphas[ib]
         @assert b > a
         if display & LINESEARCH > 0
-            println(
-                "linesearch: ia = ",
-                ia,
-                ", ib = ",
-                ib,
-                ", a = ",
-                a,
-                ", b = ",
-                b,
-                ", phi(a) = ",
-                values[ia],
-                ", phi(b) = ",
-                values[ib],
-            )
+            println("linesearch: ia = ", ia,
+                    ", ib = ", ib,
+                    ", a = ", a,
+                    ", b = ", b,
+                    ", phi(a) = ", values[ia],
+                    ", phi(b) = ", values[ib])
         end
         if b - a <= eps(b)
             mayterminate[] = false # reset in case another initial guess is used next
             return a, values[ia] # lsr.value[ia]
         end
-        iswolfe, iA, iB =
-            secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
+        iswolfe, iA, iB = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
         if iswolfe
             mayterminate[] = false # reset in case another initial guess is used next
             return alphas[iA], values[iA] # lsr.value[iA]
@@ -331,9 +286,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T}
             if display & LINESEARCH > 0
                 println("Linesearch: secant succeeded")
             end
-            if ls.check_flatness &&
-               nextfloat(values[ia]) >= values[ib] &&
-               nextfloat(values[iA]) >= values[iB]
+            if ls.check_flatness && nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB]
                 # It's so flat, secant didn't do anything useful, time to quit
                 if display & LINESEARCH > 0
                     println("Linesearch: secant suggests it's flat")
@@ -357,44 +310,30 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, c::T, phi_0::Real, dphi_0::Real) where {T}
             push!(values, phi_c)
             push!(slopes, dphi_c)
 
-            ia, ib = update!(
-                ϕdϕ,
-                alphas,
-                values,
-                slopes,
-                iA,
-                iB,
-                length(alphas),
-                phi_lim,
-                display,
-            )
+            ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display)
         end
         iter += 1
     end
 
-    throw(
-        LineSearchException(
-            "Linesearch failed to converge, reached maximum iterations $(linesearchmax).",
-            alphas[ia],
-        ),
-    )
+    throw(LineSearchException("Linesearch failed to converge, reached maximum iterations $(linesearchmax).",
+                              alphas[ia]))
 
 
 end
 
 # Check Wolfe & approximate Wolfe
-function satisfies_wolfe(
-    c::T,
-    phi_c::Real,
-    dphi_c::Real,
-    phi_0::Real,
-    dphi_0::Real,
-    phi_lim::Real,
-    delta::Real,
-    sigma::Real,
-) where {T<:Number}
-    wolfe1 = delta * dphi_0 >= (phi_c - phi_0) / c && dphi_c >= sigma * dphi_0
-    wolfe2 = (2 * delta - 1) * dphi_0 >= dphi_c >= sigma * dphi_0 && phi_c <= phi_lim
+function satisfies_wolfe(c::T,
+                         phi_c::Real,
+                         dphi_c::Real,
+                         phi_0::Real,
+                         dphi_0::Real,
+                         phi_lim::Real,
+                         delta::Real,
+                         sigma::Real) where T<:Number
+    wolfe1 = delta * dphi_0 >= (phi_c - phi_0) / c &&
+               dphi_c >= sigma * dphi_0
+    wolfe2 = (2 * delta - 1) * dphi_0 >= dphi_c >= sigma * dphi_0 &&
+               phi_c <= phi_lim
     return wolfe1 || wolfe2
 end
 
@@ -406,18 +345,16 @@ function secant(alphas, values, slopes, ia::Integer, ib::Integer)
     return secant(alphas[ia], alphas[ib], slopes[ia], slopes[ib])
 end
 # phi
-function secant2!(
-    ϕdϕ,
-    alphas,
-    values,
-    slopes,
-    ia::Integer,
-    ib::Integer,
-    phi_lim::Real,
-    delta::Real = DEFAULTDELTA,
-    sigma::Real = DEFAULTSIGMA,
-    display::Integer = 0,
-)
+function secant2!(ϕdϕ,
+                  alphas,
+                  values,
+                  slopes,
+                  ia::Integer,
+                  ib::Integer,
+                  phi_lim::Real,
+                  delta::Real = DEFAULTDELTA,
+                  sigma::Real = DEFAULTSIGMA,
+                  display::Integer = 0)
     phi_0 = values[1]
     dphi_0 = slopes[1]
     a = alphas[ia]
@@ -427,13 +364,9 @@ function secant2!(
     T = eltype(slopes)
     zeroT = convert(T, 0)
     if !(dphi_a < zeroT && dphi_b >= zeroT)
-        error(
-            string(
-                "Search direction is not a direction of descent; ",
-                "this error may indicate that user-provided derivatives are inaccurate. ",
-                @sprintf "(dphi_a = %f; dphi_b = %f)" dphi_a dphi_b
-            ),
-        )
+        error(string("Search direction is not a direction of descent; ",
+                     "this error may indicate that user-provided derivatives are inaccurate. ",
+                      @sprintf "(dphi_a = %f; dphi_b = %f)" dphi_a dphi_b))
     end
     c = secant(a, b, dphi_a, dphi_b)
     if display & SECANT2 > 0
@@ -502,17 +435,15 @@ end
 # Given a third point, pick the best two that retain the bracket
 # around the minimum (as defined by HZ, eq. 29)
 # b will be the upper bound, and a the lower bound
-function update!(
-    ϕdϕ,
-    alphas,
-    values,
-    slopes,
-    ia::Integer,
-    ib::Integer,
-    ic::Integer,
-    phi_lim::Real,
-    display::Integer = 0,
-)
+function update!(ϕdϕ,
+                 alphas,
+                 values,
+                 slopes,
+                 ia::Integer,
+                 ib::Integer,
+                 ic::Integer,
+                 phi_lim::Real,
+                 display::Integer = 0)
     a = alphas[ia]
     b = alphas[ib]
     T = eltype(slopes)
@@ -526,22 +457,13 @@ function update!(
     phi_c = values[ic]
     dphi_c = slopes[ic]
     if display & UPDATE > 0
-        println(
-            "update: ia = ",
-            ia,
-            ", a = ",
-            a,
-            ", ib = ",
-            ib,
-            ", b = ",
-            b,
-            ", c = ",
-            c,
-            ", phi_c = ",
-            phi_c,
-            ", dphi_c = ",
-            dphi_c,
-        )
+        println("update: ia = ", ia,
+                ", a = ", a,
+                ", ib = ", ib,
+                ", b = ", b,
+                ", c = ", c,
+                ", phi_c = ", phi_c,
+                ", dphi_c = ", dphi_c)
     end
     if c < a || c > b
         return ia, ib #, 0, 0  # it's out of the bracketing interval
@@ -563,16 +485,14 @@ function update!(
 end
 
 # HZ, stage U3 (with theta=0.5)
-function bisect!(
-    ϕdϕ,
-    alphas::AbstractArray{T},
-    values,
-    slopes,
-    ia::Integer,
-    ib::Integer,
-    phi_lim::Real,
-    display::Integer = 0,
-) where {T}
+function bisect!(ϕdϕ,
+                 alphas::AbstractArray{T},
+                 values,
+                 slopes,
+                 ia::Integer,
+                 ib::Integer,
+                 phi_lim::Real,
+                 display::Integer = 0) where T
     gphi = convert(T, NaN)
     a = alphas[ia]
     b = alphas[ib]

From aa8fe5bd16a4b28b1e05ff99ca143db094a9a6b0 Mon Sep 17 00:00:00 2001
From: Patrick Kofod Mogensen <patrick.mogensen@gmail.com>
Date: Wed, 26 Mar 2025 07:39:05 +0100
Subject: [PATCH 3/5] Update issues.jl

---
 test/issues.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/issues.jl b/test/issues.jl
index 7d717fa..188d5e4 100644
--- a/test/issues.jl
+++ b/test/issues.jl
@@ -134,6 +134,8 @@ end
         end
 
         res, res_cache = test_tc(tc1, true)
+        @warn res
+        @warn res_cache
         @test_broken minimum(res_cache.values) == res[2]
 
         res2, res_cache2 = test_tc(tc1, false)

From f862bb8516a6b15ce394166907695843db496118 Mon Sep 17 00:00:00 2001
From: Patrick Kofod Mogensen <patrick.mogensen@gmail.com>
Date: Wed, 26 Mar 2025 07:44:26 +0100
Subject: [PATCH 4/5] Update issues.jl

---
 test/issues.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/issues.jl b/test/issues.jl
index 188d5e4..65ac142 100644
--- a/test/issues.jl
+++ b/test/issues.jl
@@ -134,8 +134,8 @@ end
         end
 
         res, res_cache = test_tc(tc1, true)
-        @warn res
-        @warn res_cache
+        @show res
+        @show res_cache
         @test_broken minimum(res_cache.values) == res[2]
 
         res2, res_cache2 = test_tc(tc1, false)

From 8b33bc2d441394787335aca6faa799b4f49543ff Mon Sep 17 00:00:00 2001
From: Patrick Kofod Mogensen <patrick.mogensen@gmail.com>
Date: Wed, 26 Mar 2025 07:54:53 +0100
Subject: [PATCH 5/5] Fix tests

---
 test/captured.jl | 2 +-
 test/issues.jl   | 6 ------
 test/runtests.jl | 3 ++-
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/test/captured.jl b/test/captured.jl
index 05b81fa..d6d97f7 100644
--- a/test/captured.jl
+++ b/test/captured.jl
@@ -31,5 +31,5 @@ end
     fdf = OnceDifferentiable(tc)
     hz = HagerZhang()
     α, val = hz(fdf.f, fdf.fdf, 1.0, fdf.fdf(0.0)...)
-    @test_broken val <= minimum(tc)
+    @test val <= minimum(tc)
 end
diff --git a/test/issues.jl b/test/issues.jl
index 65ac142..687907e 100644
--- a/test/issues.jl
+++ b/test/issues.jl
@@ -37,12 +37,6 @@ res = (StrongWolfe())(ϕ, dϕ, ϕdϕ, α0, ϕ0, dϕ0)
 @test res[2] > 0
 @test res[2] == ϕ(res[1])
 
-struct LineSearchTestCase
-    alphas::Vector{Float64}
-    values::Vector{Float64}
-    slopes::Vector{Float64}
-end
-
 @testset "HZ convergence issues" begin
     @testset "Flatness check issues" begin
         function prepare_test_case(; alphas, values, slopes)
diff --git a/test/runtests.jl b/test/runtests.jl
index 4910d53..011bc22 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,7 +15,8 @@ my_tests = [
     "alphacalc.jl",
     "arbitrary_precision.jl",
     "examples.jl",
-    "captured.jl"
+    "captured.jl",
+    "issues.jl",
 ]
 
 mutable struct StateDummy