Skip to content

Commit d34d5b5

Browse files
committed
Add Armijo Backtracking Line Search Algorithm
1 parent b9c118f commit d34d5b5

File tree

1 file changed

+155
-0
lines changed

1 file changed

+155
-0
lines changed
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/**
2+
* @file
3+
* @brief Demonstration of 1D optimization using the Armijo backtracking
4+
* line search method (adaptive step size gradient descent).
5+
*
6+
* @author [Frinad Kandoriya](https://github.com/frinad8555)
7+
* @see https://en.wikipedia.org/wiki/Backtracking_line_search
8+
*/
9+
10+
#include <cassert>
11+
#include <cmath>
12+
#include <cstdint>
13+
#include <functional>
14+
#include <iostream>
15+
#include <limits>
16+
17+
#define EPSILON 1e-7 // convergence tolerance
18+
19+
/**
20+
* @brief Perform Armijo backtracking line search for a 1D function
21+
*
22+
* The method adaptively reduces the step size until the Armijo condition is satisfied:
23+
* \f[
24+
* f(x + \alpha d) \le f(x) + c \alpha f'(x)d
25+
* \f]
26+
*
27+
* @param f the function to minimize
28+
* @param df first derivative of f
29+
* @param xk current position
30+
* @param dk descent direction
31+
* @param c Armijo constant (usually 1e-4)
32+
* @param tau backtracking reduction factor (usually 0.5)
33+
* @param alpha_init initial trial step size
34+
* @return acceptable step length satisfying Armijo condition
35+
*/
36+
37+
double armijo_backtrack(const std::function<double(double)> &f,
38+
const std::function<double(double)> &df, double xk,
39+
double dk, double c = 1e-4, double tau = 0.5,
40+
double alpha_init = 1.0) {
41+
double alpha = alpha_init;
42+
double fxk = f(xk);
43+
double grad_fxk = df(xk);
44+
45+
while (f(xk + alpha * dk) > fxk + c * alpha * grad_fxk * dk) {
46+
alpha *= tau; // reduce step size
47+
}
48+
49+
return alpha;
50+
}
51+
52+
/**
53+
* @brief Perform gradient descent using Armijo line search on a 1D function.
54+
*
55+
* @param f target function
56+
* @param df first derivative of f
57+
* @param x0 initial guess
58+
* @param max_iter maximum number of iterations
59+
* @return estimated minimum point
60+
*/
61+
62+
double gradient_descent_armijo(const std::function<double(double)> &f,
63+
const std::function<double(double)> &df,
64+
double x0, uint32_t max_iter = 50) {
65+
double x = x0;
66+
double grad = df(x);
67+
double d = -grad;
68+
int iter = 0;
69+
70+
while (std::abs(grad) > EPSILON && iter < max_iter) {
71+
double alpha = armijo_backtrack(f, df, x, d);
72+
x += alpha * d;
73+
grad = df(x);
74+
d = -grad;
75+
std::cout << "Iter " << iter << ": x = " << x << ", f(x) = " << f(x)
76+
<< ", alpha = " << alpha << "\n";
77+
iter++;
78+
}
79+
80+
std::cout << "Converged in " << iter << " iterations.\n";
81+
return x;
82+
}
83+
84+
/**
85+
* @brief Test case 1: minimize f(x) = (x - 2)^2
86+
* \n Expected minimum at x = 2
87+
*/
88+
89+
void test1() {
90+
std::cout << "Test 1... ";
91+
92+
std::function<double(double)> f = [](double x) { return pow(x - 2.0, 2); };
93+
std::function<double(double)> df = [](double x) { return 2.0 * (x - 2.0); };
94+
95+
double xmin = gradient_descent_armijo(f, df, 0.0);
96+
97+
std::cout << "Result = " << xmin << "\n";
98+
assert(std::abs(xmin - 2.0) < 1e-5);
99+
std::cout << "Passed.\n";
100+
std::cout << "\n";
101+
}
102+
103+
/**
104+
* @brief Test case 2: minimize f(x) = (x + 3)^2 + 1
105+
* \n Expected minimum at x = -3
106+
*/
107+
108+
void test2() {
109+
std::cout << "Test 2... ";
110+
111+
std::function<double(double)> f = [](double x) { return pow(x + 3.0, 2) + 1; };
112+
std::function<double(double)> df = [](double x) { return 2.0 * (x + 3.0); };
113+
114+
double xmin = gradient_descent_armijo(f, df, 5.0);
115+
116+
std::cout << "Result = " << xmin << "\n";
117+
assert(std::abs(xmin + 3.0) < 1e-5);
118+
std::cout << "Passed.\n";
119+
std::cout << "\n";
120+
}
121+
122+
/**
123+
* @brief Test case 3: minimize non-convex f(x) = x^4 - 3x^3 + 2
124+
* \n Expected local minimum near x ≈ 2.25
125+
*/
126+
127+
void test3() {
128+
std::cout << "Test 3... ";
129+
130+
std::function<double(double)> f = [](double x) {
131+
return pow(x, 4) - 3 * pow(x, 3) + 2;
132+
};
133+
std::function<double(double)> df = [](double x) {
134+
return 4 * pow(x, 3) - 9 * pow(x, 2);
135+
};
136+
137+
double xmin = gradient_descent_armijo(f, df, 1.0);
138+
139+
std::cout << "Result = " << xmin << "\n";
140+
std::cout << "Passed.\n";
141+
std::cout << "\n";
142+
}
143+
144+
/** @brief Main function */
145+
int main() {
146+
std::cout.precision(9);
147+
std::cout << "Armijo Backtracking Line Search Example\n";
148+
std::cout << "Machine epsilon: " << EPSILON << "\n\n";
149+
150+
test1();
151+
test2();
152+
test3();
153+
154+
return 0;
155+
}

0 commit comments

Comments
 (0)