Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 107 additions & 103 deletions src/backtracking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,116 +9,120 @@ there exists a factor ρ = ρ(c₁) such that α' ≦ ρ α.
This is a modification of the algorithm described in Nocedal Wright (2nd ed), Sec. 3.5.
"""
@with_kw struct BackTracking{TF, TI} <: AbstractLineSearch
c_1::TF = 1e-4
ρ_hi::TF = 0.5
ρ_lo::TF = 0.1
iterations::TI = 1_000
order::TI = 3
maxstep::TF = Inf
cache::Union{Nothing,LineSearchCache{TF}} = nothing
c_1::TF = 1e-4
ρ_hi::TF = 0.5
ρ_lo::TF = 0.1
iterations::TI = 1_000
order::TI = 3
maxstep::TF = Inf
cache::Union{Nothing, LineSearchCache{TF}} = nothing
end
BackTracking{TF}(args...; kwargs...) where TF = BackTracking{TF,Int}(args...; kwargs...)
BackTracking{TF}(args...; kwargs...) where TF = BackTracking{TF, Int}(args...; kwargs...)

function (ls::BackTracking)(df::AbstractObjective, x::AbstractArray{T}, s::AbstractArray{T},
α_0::Tα = real(T)(1), x_new::AbstractArray{T} = similar(x), ϕ_0 = nothing, dϕ_0 = nothing, alphamax = convert(real(T), Inf)) where {T, Tα}
ϕ, dϕ = make_ϕ_dϕ(df, x_new, x, s)

if ϕ_0 == nothing
ϕ_0 = ϕ(Tα(0))
end
if dϕ_0 == nothing
dϕ_0 = dϕ(Tα(0))
end

α_0 = min(α_0, min(alphamax, ls.maxstep / norm(s, Inf)))
ls(ϕ, α_0, ϕ_0, dϕ_0)
α_0::Tα = real(T)(1), x_new::AbstractArray{T} = similar(x), ϕ_0 = nothing, dϕ_0 = nothing, alphamax = typemax(real(T))) where {T, Tα}
ϕ, dϕ = make_ϕ_dϕ(df, x_new, x, s)

if isnothing(ϕ_0)
ϕ_0 = ϕ(Tα(0))
end
if isnothing(dϕ_0)
dϕ_0 = dϕ(Tα(0))
end

α_0 = min(α_0, min(alphamax, ls.maxstep / norm(s, Inf)))
ls(ϕ, α_0, ϕ_0, dϕ_0)
end

(ls::BackTracking)(ϕ, dϕ, ϕdϕ, αinitial, ϕ_0, dϕ_0) = ls(ϕ, αinitial, ϕ_0, dϕ_0)

# TODO: Should we deprecate the interface that only uses the ϕ argument?
function (ls::BackTracking)(ϕ, αinitial::Tα, ϕ_0, dϕ_0) where Tα
@unpack c_1, ρ_hi, ρ_lo, iterations, order, cache = ls
emptycache!(cache)
pushcache!(cache, 0, ϕ_0, dϕ_0) # backtracking doesn't use the slope except here

iterfinitemax = -log2(eps(real(Tα)))

@assert order in (2,3)
# Check the input is valid, and modify otherwise
#backtrack_condition = 1.0 - 1.0/(2*ρ) # want guaranteed backtrack factor
#if c_1 >= backtrack_condition
# warn("""The Armijo constant c_1 is too large; replacing it with
# $(backtrack_condition)""")
# c_1 = backtrack_condition
#end

# Count the total number of iterations
iteration = 0

ϕx_0, ϕx_1 = ϕ_0, ϕ_0

α_1, α_2 = αinitial, αinitial

ϕx_1 = ϕ(α_1)

# Hard-coded backtrack until we find a finite function value
iterfinite = 0
while !isfinite(ϕx_1) && iterfinite < iterfinitemax
iterfinite += 1
α_1 = α_2
α_2 = α_1/2

ϕx_1 = ϕ(α_2)
end
pushcache!(cache, αinitial, ϕx_1)
# TODO: check if value is finite (maybe iterfinite > iterfinitemax)

# Backtrack until we satisfy sufficient decrease condition
while ϕx_1 > ϕ_0 + c_1 * α_2 * dϕ_0
# Increment the number of steps we've had to perform
iteration += 1

# Ensure termination
if iteration > iterations
throw(LineSearchException("Linesearch failed to converge, reached maximum iterations $(iterations).",
α_2))
end

# Shrink proposed step-size:
if order == 2 || iteration == 1
# backtracking via quadratic interpolation:
# This interpolates the available data
# f(0), f'(0), f(α)
# with a quadractic which is then minimised; this comes with a
# guaranteed backtracking factor 0.5 * (1-c_1)^{-1} which is < 1
# provided that c_1 < 1/2; the backtrack_condition at the beginning
# of the function guarantees at least a backtracking factor ρ.
α_tmp = - (dϕ_0 * α_2^2) / ( 2 * (ϕx_1 - ϕ_0 - dϕ_0*α_2) )
else
div = one(Tα) / (α_1^2 * α_2^2 * (α_2 - α_1))
a = (α_1^2*(ϕx_1 - ϕ_0 - dϕ_0*α_2) - α_2^2*(ϕx_0 - ϕ_0 - dϕ_0*α_1))*div
b = (-α_1^3*(ϕx_1 - ϕ_0 - dϕ_0*α_2) + α_2^3*(ϕx_0 - ϕ_0 - dϕ_0*α_1))*div

if isapprox(a, zero(a), atol=eps(real(Tα)))
α_tmp = dϕ_0 / (2*b)
else
# discriminant
d = max(b^2 - 3*a*dϕ_0, Tα(0))
# quadratic equation root
α_tmp = (-b + sqrt(d)) / (3*a)
end
end

α_1 = α_2

α_tmp = NaNMath.min(α_tmp, α_2*ρ_hi) # avoid too small reductions
α_2 = NaNMath.max(α_tmp, α_2*ρ_lo) # avoid too big reductions

# Evaluate f(x) at proposed position
ϕx_0, ϕx_1 = ϕx_1, ϕ(α_2)
pushcache!(cache, α_2, ϕx_1)
end

return α_2, ϕx_1
@unpack c_1, ρ_hi, ρ_lo, iterations, order, cache = ls
emptycache!(cache)
pushcache!(cache, 0, ϕ_0, dϕ_0) # backtracking doesn't use the slope except here

iterfinitemax = -log2(eps(real(Tα)))

@assert order in (2, 3)
# Check the input is valid, and modify otherwise
#backtrack_condition = 1.0 - 1.0/(2*ρ) # want guaranteed backtrack factor
#if c_1 >= backtrack_condition
# warn("""The Armijo constant c_1 is too large; replacing it with
# $(backtrack_condition)""")
# c_1 = backtrack_condition
#end

# Count the total number of iterations
iteration = 0

ϕx_0, ϕx_1 = ϕ_0, ϕ_0

α_1, α_2 = αinitial, αinitial

ϕx_1 = ϕ(α_1)

# Hard-coded backtrack until we find a finite function value
iterfinite = 0
while !isfinite(ϕx_1) && iterfinite < iterfinitemax
iterfinite += 1
α_1 = α_2
α_2 = α_1 / 2

ϕx_1 = ϕ(α_2)
end
pushcache!(cache, αinitial, ϕx_1)


if !isfinite(ϕx_1)
error("Could not find a α, where the function is finite")
end

# Backtrack until we satisfy sufficient decrease condition
while ϕx_1 > ϕ_0 + c_1 * α_2 * dϕ_0
# Increment the number of steps we've had to perform
iteration += 1

# Ensure termination
if iteration > iterations
throw(LineSearchException("Linesearch failed to converge, reached maximum iterations $(iterations).",
α_2))
end

# Shrink proposed step-size:
if order == 2 || iteration == 1
# backtracking via quadratic interpolation:
# This interpolates the available data
# f(0), f'(0), f(α)
# with a quadractic which is then minimised; this comes with a
# guaranteed backtracking factor 0.5 * (1-c_1)^{-1} which is < 1
# provided that c_1 < 1/2; the backtrack_condition at the beginning
# of the function guarantees at least a backtracking factor ρ.
α_tmp = -(dϕ_0 * α_2^2) / (2 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2))
else
div = one(Tα) / (α_1^2 * α_2^2 * (α_2 - α_1))
a = (α_1^2 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2) - α_2^2 * (ϕx_0 - ϕ_0 - dϕ_0 * α_1)) * div
b = (-α_1^3 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2) + α_2^3 * (ϕx_0 - ϕ_0 - dϕ_0 * α_1)) * div

if isapprox(a, zero(a), atol = eps(real(Tα)))
α_tmp = dϕ_0 / (2 * b)
else
# discriminant
d = max(b^2 - 3 * a * dϕ_0, Tα(0))
# quadratic equation root
α_tmp = (-b + sqrt(d)) / (3 * a)
end
end

α_1 = α_2

α_tmp = NaNMath.min(α_tmp, α_2 * ρ_hi) # avoid too small reductions
α_2 = NaNMath.max(α_tmp, α_2 * ρ_lo) # avoid too big reductions

# Evaluate f(x) at proposed position
ϕx_0, ϕx_1 = ϕx_1, ϕ(α_2)
pushcache!(cache, α_2, ϕx_1)
end

return α_2, ϕx_1
end
32 changes: 14 additions & 18 deletions src/hagerzhang.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,14 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
linesearchmax, psi3, display, mayterminate, cache = ls
emptycache!(cache)

zeroT = convert(T, 0)
pushcache!(cache, zeroT, phi_0, dphi_0)
pushcache!(cache, zero(T), phi_0, dphi_0)
if !(isfinite(phi_0) && isfinite(dphi_0))
throw(LineSearchException("Value and slope at step length = 0 must be finite.", T(0)))
end
if dphi_0 >= eps(T) * abs(phi_0)
throw(LineSearchException("Search direction is not a direction of descent.", T(0)))
elseif dphi_0 >= 0
return zeroT, phi_0
return zero(T), phi_0
end

# Prevent values of x_new = x+αs that are likely to make
Expand All @@ -130,7 +129,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
if cache !== nothing
@unpack alphas, values, slopes = cache
else
alphas = [zeroT] # for bisection
alphas = [zero(T)] # for bisection
values = [phi_0]
slopes = [dphi_0]
end
Expand All @@ -141,7 +140,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,

phi_lim = phi_0 + epsilon * abs(phi_0)
@assert c >= 0
c <= eps(T) && return zeroT, phi_0
c <= eps(T) && return zero(T), phi_0
@assert isfinite(c) && c <= alphamax
phi_c, dphi_c = ϕdϕ(c)
iterfinite = 1
Expand All @@ -154,7 +153,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
if !(isfinite(phi_c) && isfinite(dphi_c))
@warn("Failed to achieve finite new evaluation point, using alpha=0")
mayterminate[] = false # reset in case another initial guess is used next
return zeroT, phi_0
return zero(T), phi_0
end
push!(alphas, c)
push!(values, phi_c)
Expand Down Expand Up @@ -185,7 +184,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
", phi_c = ", phi_c,
", dphi_c = ", dphi_c)
end
if dphi_c >= zeroT
if dphi_c >= zero(T)
# We've reached the upward slope, so we have b; examine
# previous values to find a
ib = length(alphas)
Expand All @@ -201,7 +200,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
# have crested over the peak. Use bisection.
ib = length(alphas)
ia = 1
if c ≉ alphas[ib] || slopes[ib] >= zeroT
if c ≉ alphas[ib] || slopes[ib] >= zero(T)
error("c = ", c)
end
# ia, ib = bisect(phi, lsr, ia, ib, phi_lim) # TODO: Pass options
Expand Down Expand Up @@ -360,8 +359,7 @@ function secant2!(ϕdϕ,
dphi_a = slopes[ia]
dphi_b = slopes[ib]
T = eltype(slopes)
zeroT = convert(T, 0)
if !(dphi_a < zeroT && dphi_b >= zeroT)
if !(dphi_a < zero(T) && dphi_b >= zero(T))
error(string("Search direction is not a direction of descent; ",
"this error may indicate that user-provided derivatives are inaccurate. ",
@sprintf "(dphi_a = %f; dphi_b = %f)" dphi_a dphi_b))
Expand Down Expand Up @@ -445,11 +443,10 @@ function update!(ϕdϕ,
a = alphas[ia]
b = alphas[ib]
T = eltype(slopes)
zeroT = convert(T, 0)
# Debugging (HZ, eq. 4.4):
@assert slopes[ia] < zeroT
@assert slopes[ia] < zero(T)
@assert values[ia] <= phi_lim
@assert slopes[ib] >= zeroT
@assert slopes[ib] >= zero(T)
@assert b > a
c = alphas[ic]
phi_c = values[ic]
Expand All @@ -466,7 +463,7 @@ function update!(ϕdϕ,
if c < a || c > b
return ia, ib #, 0, 0 # it's out of the bracketing interval
end
if dphi_c >= zeroT
if dphi_c >= zero(T)
return ia, ic #, 0, 0 # replace b with a closer point
end
# We know dphi_c < 0. However, phi may not be monotonic between a
Expand Down Expand Up @@ -495,10 +492,9 @@ function bisect!(ϕdϕ,
a = alphas[ia]
b = alphas[ib]
# Debugging (HZ, conditions shown following U3)
zeroT = convert(T, 0)
@assert slopes[ia] < zeroT
@assert slopes[ia] < zero(T)
@assert values[ia] <= phi_lim
@assert slopes[ib] < zeroT # otherwise we wouldn't be here
@assert slopes[ib] < zero(T) # otherwise we wouldn't be here
@assert values[ib] > phi_lim
@assert b > a
while b - a > eps(b)
Expand All @@ -514,7 +510,7 @@ function bisect!(ϕdϕ,
push!(slopes, gphi)

id = length(alphas)
if gphi >= zeroT
if gphi >= zero(T)
return ia, id # replace b, return
end
if phi_d <= phi_lim
Expand Down
9 changes: 4 additions & 5 deletions src/initialguess.jl
Original file line number Diff line number Diff line change
Expand Up @@ -274,14 +274,13 @@ function _hzI0(x::AbstractArray{Tx},
f_x::T,
alphamax::T,
psi0::T = convert(T, 1)/100) where {Tx,T}
zeroT = convert(T, 0)
alpha = convert(T, 1)
alpha = one(T)
gr_max = maximum(abs, gr)
if gr_max != zeroT
if gr_max != zero(T)
x_max = maximum(abs, x)
if x_max != zeroT
if x_max != zero(T)
alpha = psi0 * x_max / gr_max
elseif f_x != zeroT
elseif f_x != zero(T)
alpha = psi0 * abs(f_x) / norm(gr)^2
end
end
Expand Down
Loading
Loading