JuliaNLSolvers · theplatters · Oct 17, 2024 · Oct 18, 2024 · Oct 18, 2024
diff --git a/src/backtracking.jl b/src/backtracking.jl
@@ -9,116 +9,120 @@ there exists a factor ρ = ρ(c₁) such that α' ≦ ρ α.
 This is a modification of the algorithm described in Nocedal Wright (2nd ed), Sec. 3.5.
 """
 @with_kw struct BackTracking{TF, TI} <: AbstractLineSearch
-    c_1::TF = 1e-4
-    ρ_hi::TF = 0.5
-    ρ_lo::TF = 0.1
-    iterations::TI = 1_000
-    order::TI = 3
-    maxstep::TF = Inf
-    cache::Union{Nothing,LineSearchCache{TF}} = nothing
+	c_1::TF = 1e-4
+	ρ_hi::TF = 0.5
+	ρ_lo::TF = 0.1
+	iterations::TI = 1_000
+	order::TI = 3
+	maxstep::TF = Inf
+	cache::Union{Nothing, LineSearchCache{TF}} = nothing
 end
-BackTracking{TF}(args...; kwargs...) where TF = BackTracking{TF,Int}(args...; kwargs...)
+BackTracking{TF}(args...; kwargs...) where TF = BackTracking{TF, Int}(args...; kwargs...)
 
 function (ls::BackTracking)(df::AbstractObjective, x::AbstractArray{T}, s::AbstractArray{T},
-                            α_0::Tα = real(T)(1), x_new::AbstractArray{T} = similar(x), ϕ_0 = nothing, dϕ_0 = nothing, alphamax = convert(real(T), Inf)) where {T, Tα}
-    ϕ, dϕ = make_ϕ_dϕ(df, x_new, x, s)
-
-    if ϕ_0 == nothing
-        ϕ_0 = ϕ(Tα(0))
-    end
-    if dϕ_0 == nothing
-        dϕ_0 = dϕ(Tα(0))
-    end
-
-    α_0 = min(α_0, min(alphamax, ls.maxstep / norm(s, Inf)))
-    ls(ϕ, α_0, ϕ_0, dϕ_0)
+	α_0::Tα = real(T)(1), x_new::AbstractArray{T} = similar(x), ϕ_0 = nothing, dϕ_0 = nothing, alphamax = typemax(real(T))) where {T, Tα}
+	ϕ, dϕ = make_ϕ_dϕ(df, x_new, x, s)
+
+	if isnothing(ϕ_0)
+		ϕ_0 = ϕ(Tα(0))
+	end
+	if isnothing(dϕ_0) 
+		dϕ_0 = dϕ(Tα(0))
+	end
+
+	α_0 = min(α_0, min(alphamax, ls.maxstep / norm(s, Inf)))
+	ls(ϕ, α_0, ϕ_0, dϕ_0)
 end
 
 (ls::BackTracking)(ϕ, dϕ, ϕdϕ, αinitial, ϕ_0, dϕ_0) = ls(ϕ, αinitial, ϕ_0, dϕ_0)
 
 # TODO: Should we deprecate the interface that only uses the ϕ argument?
 function (ls::BackTracking)(ϕ, αinitial::Tα, ϕ_0, dϕ_0) where Tα
-    @unpack c_1, ρ_hi, ρ_lo, iterations, order, cache = ls
-    emptycache!(cache)
-    pushcache!(cache, 0, ϕ_0, dϕ_0)  # backtracking doesn't use the slope except here
-
-    iterfinitemax = -log2(eps(real(Tα)))
-
-    @assert order in (2,3)
-    # Check the input is valid, and modify otherwise
-    #backtrack_condition = 1.0 - 1.0/(2*ρ) # want guaranteed backtrack factor
-    #if c_1 >= backtrack_condition
-    #    warn("""The Armijo constant c_1 is too large; replacing it with
-    #                   $(backtrack_condition)""")
-    #   c_1 = backtrack_condition
-    #end
-
-    # Count the total number of iterations
-    iteration = 0
-
-    ϕx_0, ϕx_1 = ϕ_0, ϕ_0
-
-    α_1, α_2 = αinitial, αinitial
-
-    ϕx_1 = ϕ(α_1)
-
-    # Hard-coded backtrack until we find a finite function value
-    iterfinite = 0
-    while !isfinite(ϕx_1) && iterfinite < iterfinitemax
-        iterfinite += 1
-        α_1 = α_2
-        α_2 = α_1/2
-
-        ϕx_1 = ϕ(α_2)
-    end
-    pushcache!(cache, αinitial, ϕx_1)
-    # TODO: check if value is finite (maybe iterfinite > iterfinitemax)
-
-    # Backtrack until we satisfy sufficient decrease condition
-    while ϕx_1 > ϕ_0 + c_1 * α_2 * dϕ_0
-        # Increment the number of steps we've had to perform
-        iteration += 1
-
-        # Ensure termination
-        if iteration > iterations
-            throw(LineSearchException("Linesearch failed to converge, reached maximum iterations $(iterations).",
-                                      α_2))
-        end
-
-        # Shrink proposed step-size:
-        if order == 2 || iteration == 1
-            # backtracking via quadratic interpolation:
-            # This interpolates the available data
-            #    f(0), f'(0), f(α)
-            # with a quadractic which is then minimised; this comes with a
-            # guaranteed backtracking factor 0.5 * (1-c_1)^{-1} which is < 1
-            # provided that c_1 < 1/2; the backtrack_condition at the beginning
-            # of the function guarantees at least a backtracking factor ρ.
-            α_tmp = - (dϕ_0 * α_2^2) / ( 2 * (ϕx_1 - ϕ_0 - dϕ_0*α_2) )
-        else
-            div = one(Tα) / (α_1^2 * α_2^2 * (α_2 - α_1))
-            a = (α_1^2*(ϕx_1 - ϕ_0 - dϕ_0*α_2) - α_2^2*(ϕx_0 - ϕ_0 - dϕ_0*α_1))*div
-            b = (-α_1^3*(ϕx_1 - ϕ_0 - dϕ_0*α_2) + α_2^3*(ϕx_0 - ϕ_0 - dϕ_0*α_1))*div
-
-            if isapprox(a, zero(a), atol=eps(real(Tα)))
-                α_tmp = dϕ_0 / (2*b)
-            else
-                # discriminant
-                d = max(b^2 - 3*a*dϕ_0, Tα(0))
-                # quadratic equation root
-                α_tmp = (-b + sqrt(d)) / (3*a)
-            end
-        end
-
-        α_1 = α_2
-
-        α_tmp = NaNMath.min(α_tmp, α_2*ρ_hi) # avoid too small reductions
-        α_2 = NaNMath.max(α_tmp, α_2*ρ_lo) # avoid too big reductions
-
-        # Evaluate f(x) at proposed position
-        ϕx_0, ϕx_1 = ϕx_1, ϕ(α_2)
-        pushcache!(cache, α_2, ϕx_1)
-    end
-
-    return α_2, ϕx_1
+	@unpack c_1, ρ_hi, ρ_lo, iterations, order, cache = ls
+	emptycache!(cache)
+	pushcache!(cache, 0, ϕ_0, dϕ_0)  # backtracking doesn't use the slope except here
+
+	iterfinitemax = -log2(eps(real(Tα)))
+
+	@assert order in (2, 3)
+	# Check the input is valid, and modify otherwise
+	#backtrack_condition = 1.0 - 1.0/(2*ρ) # want guaranteed backtrack factor
+	#if c_1 >= backtrack_condition
+	#    warn("""The Armijo constant c_1 is too large; replacing it with
+	#                   $(backtrack_condition)""")
+	#   c_1 = backtrack_condition
+	#end
+
+	# Count the total number of iterations
+	iteration = 0
+
+	ϕx_0, ϕx_1 = ϕ_0, ϕ_0
+
+	α_1, α_2 = αinitial, αinitial
+
+	ϕx_1 = ϕ(α_1)
+
+	# Hard-coded backtrack until we find a finite function value
+	iterfinite = 0
+	while !isfinite(ϕx_1) && iterfinite < iterfinitemax
+		iterfinite += 1
+		α_1 = α_2
+		α_2 = α_1 / 2
+
+		ϕx_1 = ϕ(α_2)
+	end
+	pushcache!(cache, αinitial, ϕx_1)
+
+
+	if !isfinite(ϕx_1)
+			error("Could not find a α, where the function is finite")
+	end
+
+	# Backtrack until we satisfy sufficient decrease condition
+	while ϕx_1 > ϕ_0 + c_1 * α_2 * dϕ_0
+		# Increment the number of steps we've had to perform
+		iteration += 1
+
+		# Ensure termination
+		if iteration > iterations
+			throw(LineSearchException("Linesearch failed to converge, reached maximum iterations $(iterations).",
+				α_2))
+		end
+
+		# Shrink proposed step-size:
+		if order == 2 || iteration == 1
+			# backtracking via quadratic interpolation:
+			# This interpolates the available data
+			#    f(0), f'(0), f(α)
+			# with a quadractic which is then minimised; this comes with a
+			# guaranteed backtracking factor 0.5 * (1-c_1)^{-1} which is < 1
+			# provided that c_1 < 1/2; the backtrack_condition at the beginning
+			# of the function guarantees at least a backtracking factor ρ.
+			α_tmp = -(dϕ_0 * α_2^2) / (2 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2))
+		else
+			div = one(Tα) / (α_1^2 * α_2^2 * (α_2 - α_1))
+			a = (α_1^2 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2) - α_2^2 * (ϕx_0 - ϕ_0 - dϕ_0 * α_1)) * div
+			b = (-α_1^3 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2) + α_2^3 * (ϕx_0 - ϕ_0 - dϕ_0 * α_1)) * div
+
+			if isapprox(a, zero(a), atol = eps(real(Tα)))
+				α_tmp = dϕ_0 / (2 * b)
+			else
+				# discriminant
+				d = max(b^2 - 3 * a * dϕ_0, Tα(0))
+				# quadratic equation root
+				α_tmp = (-b + sqrt(d)) / (3 * a)
+			end
+		end
+
+		α_1 = α_2
+
+		α_tmp = NaNMath.min(α_tmp, α_2 * ρ_hi) # avoid too small reductions
+		α_2 = NaNMath.max(α_tmp, α_2 * ρ_lo) # avoid too big reductions
+
+		# Evaluate f(x) at proposed position
+		ϕx_0, ϕx_1 = ϕx_1, ϕ(α_2)
+		pushcache!(cache, α_2, ϕx_1)
+	end
+
+	return α_2, ϕx_1
 end
diff --git a/src/hagerzhang.jl b/src/hagerzhang.jl
@@ -113,15 +113,14 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             linesearchmax, psi3, display, mayterminate, cache = ls
     emptycache!(cache)
 
-    zeroT = convert(T, 0)
-    pushcache!(cache, zeroT, phi_0, dphi_0)
+    pushcache!(cache, zero(T), phi_0, dphi_0)
     if !(isfinite(phi_0) && isfinite(dphi_0))
         throw(LineSearchException("Value and slope at step length = 0 must be finite.", T(0)))
     end
     if dphi_0 >= eps(T) * abs(phi_0)
         throw(LineSearchException("Search direction is not a direction of descent.", T(0)))
     elseif dphi_0 >= 0
-        return zeroT, phi_0
+        return zero(T), phi_0
     end
 
     # Prevent values of x_new = x+αs that are likely to make
@@ -130,7 +129,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
     if cache !== nothing
         @unpack alphas, values, slopes = cache
     else
-        alphas = [zeroT] # for bisection
+        alphas = [zero(T)] # for bisection
         values = [phi_0]
         slopes = [dphi_0]
     end
@@ -141,7 +140,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
 
     phi_lim = phi_0 + epsilon * abs(phi_0)
     @assert c >= 0
-    c <= eps(T) && return zeroT, phi_0
+    c <= eps(T) && return zero(T), phi_0
     @assert isfinite(c) && c <= alphamax
     phi_c, dphi_c = ϕdϕ(c)
     iterfinite = 1
@@ -154,7 +153,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
     if !(isfinite(phi_c) && isfinite(dphi_c))
         @warn("Failed to achieve finite new evaluation point, using alpha=0")
         mayterminate[] = false # reset in case another initial guess is used next
-        return zeroT, phi_0
+        return zero(T), phi_0
     end
     push!(alphas, c)
     push!(values, phi_c)
@@ -185,7 +184,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
                     ", phi_c = ", phi_c,
                     ", dphi_c = ", dphi_c)
         end
-        if dphi_c >= zeroT
+        if dphi_c >= zero(T)
             # We've reached the upward slope, so we have b; examine
             # previous values to find a
             ib = length(alphas)
@@ -201,7 +200,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             # have crested over the peak. Use bisection.
             ib = length(alphas)
             ia = 1
-            if c ≉  alphas[ib] || slopes[ib] >= zeroT
+            if c ≉  alphas[ib] || slopes[ib] >= zero(T)
                 error("c = ", c)
             end
             # ia, ib = bisect(phi, lsr, ia, ib, phi_lim) # TODO: Pass options
@@ -360,8 +359,7 @@ function secant2!(ϕdϕ,
     dphi_a = slopes[ia]
     dphi_b = slopes[ib]
     T = eltype(slopes)
-    zeroT = convert(T, 0)
-    if !(dphi_a < zeroT && dphi_b >= zeroT)
+    if !(dphi_a < zero(T) && dphi_b >= zero(T))
         error(string("Search direction is not a direction of descent; ",
                      "this error may indicate that user-provided derivatives are inaccurate. ",
                       @sprintf "(dphi_a = %f; dphi_b = %f)" dphi_a dphi_b))
@@ -445,11 +443,10 @@ function update!(ϕdϕ,
     a = alphas[ia]
     b = alphas[ib]
     T = eltype(slopes)
-    zeroT = convert(T, 0)
     # Debugging (HZ, eq. 4.4):
-    @assert slopes[ia] < zeroT
+    @assert slopes[ia] < zero(T)
     @assert values[ia] <= phi_lim
-    @assert slopes[ib] >= zeroT
+    @assert slopes[ib] >= zero(T)
     @assert b > a
     c = alphas[ic]
     phi_c = values[ic]
@@ -466,7 +463,7 @@ function update!(ϕdϕ,
     if c < a || c > b
         return ia, ib #, 0, 0  # it's out of the bracketing interval
     end
-    if dphi_c >= zeroT
+    if dphi_c >= zero(T)
         return ia, ic #, 0, 0  # replace b with a closer point
     end
     # We know dphi_c < 0. However, phi may not be monotonic between a
@@ -495,10 +492,9 @@ function bisect!(ϕdϕ,
     a = alphas[ia]
     b = alphas[ib]
     # Debugging (HZ, conditions shown following U3)
-    zeroT = convert(T, 0)
-    @assert slopes[ia] < zeroT
+    @assert slopes[ia] < zero(T)
     @assert values[ia] <= phi_lim
-    @assert slopes[ib] < zeroT       # otherwise we wouldn't be here
+    @assert slopes[ib] < zero(T)       # otherwise we wouldn't be here
     @assert values[ib] > phi_lim
     @assert b > a
     while b - a > eps(b)
@@ -514,7 +510,7 @@ function bisect!(ϕdϕ,
         push!(slopes, gphi)
 
         id = length(alphas)
-        if gphi >= zeroT
+        if gphi >= zero(T)
             return ia, id # replace b, return
         end
         if phi_d <= phi_lim

diff --git a/src/initialguess.jl b/src/initialguess.jl
@@ -274,14 +274,13 @@ function _hzI0(x::AbstractArray{Tx},
                f_x::T,
                alphamax::T,
                psi0::T = convert(T, 1)/100) where {Tx,T}
-    zeroT = convert(T, 0)
-    alpha = convert(T, 1)
+    alpha = one(T) 
     gr_max = maximum(abs, gr)
-    if gr_max != zeroT
+    if gr_max != zero(T)
         x_max = maximum(abs, x)
-        if x_max != zeroT
+        if x_max != zero(T)
             alpha = psi0 * x_max / gr_max
-        elseif f_x != zeroT
+        elseif f_x != zero(T)
             alpha = psi0 * abs(f_x) / norm(gr)^2
         end
     end