@@ -3,36 +3,173 @@ using LineSearches, LinearAlgebra, Test
3
3
4
4
A = randn (100 , 100 )
5
5
x0 = randn (100 )
6
- b = A* x0
6
+ b = A * x0
7
7
8
8
# Objective function and gradient
9
- f (x) = . 5 * norm (A* x - b)^ 2
10
- g! (gvec, x) = (gvec .= A' * (A * x - b))
9
+ f (x) = 0.5 * norm (A * x - b)^ 2
10
+ g! (gvec, x) = (gvec .= A' * (A * x - b))
11
11
fg! (gvec, x) = (g! (gvec, x); return f (x))
12
12
13
13
# Init
14
- x = 1f1 * randn (100 )
14
+ x = 1.0f1 * randn (100 )
15
15
gv = similar (x)
16
16
17
17
# Line search
18
- α0 = 1f -3
18
+ α0 = 1.0f -3
19
19
ϕ0 = fg! (gv, x)
20
- s = - 1 * gv
20
+ s = - 1 * gv
21
21
dϕ0 = dot (gv, s)
22
22
println (ϕ0, " , " , dϕ0)
23
23
24
24
# Univariate line search functions
25
- ϕ (α) = f (x .+ α.* s)
25
+ ϕ (α) = f (x .+ α .* s)
26
26
function dϕ (α)
27
- g! (gv, x .+ α.* s)
27
+ g! (gv, x .+ α .* s)
28
28
return dot (gv, s)
29
29
end
30
30
function ϕdϕ (α)
31
- phi = fg! (gv, x .+ α.* s)
31
+ phi = fg! (gv, x .+ α .* s)
32
32
dphi = dot (gv, s)
33
33
return (phi, dphi)
34
34
end
35
35
36
36
res = (StrongWolfe ())(ϕ, dϕ, ϕdϕ, α0, ϕ0, dϕ0)
37
37
@test res[2 ] > 0
38
38
@test res[2 ] == ϕ (res[1 ])
39
+
40
+ @testset " HZ convergence issues" begin
41
+ @testset " Flatness check issues" begin
42
+ function prepare_test_case (; alphas, values, slopes)
43
+ perm = sortperm (alphas)
44
+ alphas = alphas[perm]
45
+ push! (alphas, alphas[end ] + 1 )
46
+ values = values[perm]
47
+ push! (values, values[end ])
48
+ slopes = slopes[perm]
49
+ push! (slopes, 0.0 )
50
+ return LineSearchTestCase (alphas, values, slopes)
51
+ end
52
+
53
+ tc1 = prepare_test_case (;
54
+ alphas = [0.0 , 1.0 , 5.0 , 3.541670844449739 ],
55
+ values = [
56
+ 3003.592409634743 ,
57
+ 2962.0378569864743 ,
58
+ 2891.4462095232184 ,
59
+ 3000.9760725116876 ,
60
+ ],
61
+ slopes = [
62
+ - 22332.321416890798 ,
63
+ - 20423.214551925797 ,
64
+ 11718.185026267562 ,
65
+ - 22286.821227217057 ,
66
+ ],
67
+ )
68
+
69
+ function tc_to_f (tc)
70
+ function f (x)
71
+ i = findfirst (u -> u > x, tc. alphas) - 1
72
+ xk = tc. alphas[i]
73
+ xkp1 = tc. alphas[i+ 1 ]
74
+ dx = xkp1 - xk
75
+ t = (x - xk) / dx
76
+ h00t = 2 t^ 3 - 3 t^ 2 + 1
77
+ h10t = t * (1 - t)^ 2
78
+ h01t = t^ 2 * (3 - 2 t)
79
+ h11t = t^ 2 * (t - 1 )
80
+ val =
81
+ h00t * tc. values[i] +
82
+ h10t * dx * tc. slopes[i] +
83
+ h01t * tc. values[i+ 1 ] +
84
+ h11t * dx * tc. slopes[i+ 1 ]
85
+
86
+ return val
87
+ end
88
+ end
89
+ function tc_to_fdf (tc)
90
+ function fdf (x)
91
+ i = findfirst (u -> u > x, tc. alphas) - 1
92
+ xk = tc. alphas[i]
93
+ xkp1 = tc. alphas[i+ 1 ]
94
+ dx = xkp1 - xk
95
+ t = (x - xk) / dx
96
+ h00t = 2 t^ 3 - 3 t^ 2 + 1
97
+ h10t = t * (1 - t)^ 2
98
+ h01t = t^ 2 * (3 - 2 t)
99
+ h11t = t^ 2 * (t - 1 )
100
+ val =
101
+ h00t * tc. values[i] +
102
+ h10t * dx * tc. slopes[i] +
103
+ h01t * tc. values[i+ 1 ] +
104
+ h11t * dx * tc. slopes[i+ 1 ]
105
+
106
+ h00tp = 6 t^ 2 - 6 t
107
+ h10tp = 3 t^ 2 - 4 t + 1
108
+ h01tp = - 6 t^ 2 + 6 * t
109
+ h11tp = 3 t^ 2 - 2 t
110
+ slope =
111
+ (
112
+ h00tp * tc. values[i] +
113
+ h10tp * dx * tc. slopes[i] +
114
+ h01tp * tc. values[i+ 1 ] +
115
+ h11tp * dx * tc. slopes[i+ 1 ]
116
+ ) / dx
117
+ println (x, " " , val, " " , slope)
118
+ return val, slope
119
+ end
120
+ end
121
+
122
+ function test_tc (tc, check_flatness)
123
+ cache = LineSearchCache {Float64} ()
124
+ hz = HagerZhang (; cache, check_flatness)
125
+ f = tc_to_f (tc)
126
+ fdf = tc_to_fdf (tc)
127
+ hz (f, fdf, 1.0 , fdf (0.0 )... ), cache
128
+ end
129
+
130
+ res, res_cache = test_tc (tc1, true )
131
+ @show res
132
+ @show res_cache
133
+ @test_broken minimum (res_cache. values) == res[2 ]
134
+
135
+ res2, res_cache2 = test_tc (tc1, false )
136
+ @test minimum (res_cache2. values) == res2[2 ]
137
+ #=
138
+ using AlgebraOfGraphics, CairoMakie
139
+ draw(data((x=0.0:0.05:5.5, y=map(x->tc_to_f(tc1)(x), 0:0.05:5.5)))*mapping(:x,:y)*visual(Scatter)+
140
+ data((alphas=res_cache.alphas, values=res_cache.values))*mapping(:alphas,:values)*visual(Scatter; color=:red))
141
+ =#
142
+ end
143
+
144
+ # should add as upstream
145
+ #=
146
+ @testset "from kbarros" begin
147
+ # The minimizer is x0=[0, 2πn/100], with f(x0) = 1. Any integer n is fine.
148
+ function f(x)
149
+ return (x[1]^2 + 1) * (2 - cos(100*x[2]))
150
+ end
151
+
152
+ using Optim
153
+
154
+ function test_converges(method)
155
+ for i in 1:100
156
+ r = randn(2)
157
+ res = optimize(f, r, method)
158
+ if Optim.converged(res) && minimum(res) > f([0,0]) + 1e-8
159
+ println("""
160
+ Incorrectly reported convergence after $(res.iterations) iterations
161
+ Reached x = $(Optim.minimizer(res)) with f(x) = $(minimum(res))
162
+ """)
163
+ end
164
+ end
165
+ end
166
+
167
+ # Works successfully, no printed output
168
+ test_converges(LBFGS(; linesearch=Optim.LineSearches.BackTracking(order=2)))
169
+
170
+ # Prints ~10 failures to converge (in 100 tries). Frequently fails after the
171
+ # first line search.
172
+ test_converges(ConjugateGradient(; linesearch=Optim.LineSearches.HagerZhang(check_flatness=false)))
173
+ end
174
+ =#
175
+ end
0 commit comments