|
| 1 | +/** |
| 2 | + * @file |
| 3 | + * @brief Demonstration of 1D optimization using the Armijo backtracking |
| 4 | + * line search method (adaptive step size gradient descent). |
| 5 | + * |
| 6 | + * @author [Frinad Kandoriya](https://github.com/frinad8555) |
| 7 | + * @see https://en.wikipedia.org/wiki/Backtracking_line_search |
| 8 | + */ |
| 9 | + |
| 10 | +#include <cassert> |
| 11 | +#include <cmath> |
| 12 | +#include <cstdint> |
| 13 | +#include <functional> |
| 14 | +#include <iostream> |
| 15 | +#include <limits> |
| 16 | + |
| 17 | +#define EPSILON 1e-7 // convergence tolerance |
| 18 | + |
| 19 | +/** |
| 20 | + * @brief Perform Armijo backtracking line search for a 1D function |
| 21 | + * |
| 22 | + * The method adaptively reduces the step size until the Armijo condition is satisfied: |
| 23 | + * \f[ |
| 24 | + * f(x + \alpha d) \le f(x) + c \alpha f'(x)d |
| 25 | + * \f] |
| 26 | + * |
| 27 | + * @param f the function to minimize |
| 28 | + * @param df first derivative of f |
| 29 | + * @param xk current position |
| 30 | + * @param dk descent direction |
| 31 | + * @param c Armijo constant (usually 1e-4) |
| 32 | + * @param tau backtracking reduction factor (usually 0.5) |
| 33 | + * @param alpha_init initial trial step size |
| 34 | + * @return acceptable step length satisfying Armijo condition |
| 35 | + */ |
| 36 | + |
| 37 | +double armijo_backtrack(const std::function<double(double)> &f, |
| 38 | + const std::function<double(double)> &df, double xk, |
| 39 | + double dk, double c = 1e-4, double tau = 0.5, |
| 40 | + double alpha_init = 1.0) { |
| 41 | + double alpha = alpha_init; |
| 42 | + double fxk = f(xk); |
| 43 | + double grad_fxk = df(xk); |
| 44 | + |
| 45 | + while (f(xk + alpha * dk) > fxk + c * alpha * grad_fxk * dk) { |
| 46 | + alpha *= tau; // reduce step size |
| 47 | + } |
| 48 | + |
| 49 | + return alpha; |
| 50 | +} |
| 51 | + |
| 52 | +/** |
| 53 | + * @brief Perform gradient descent using Armijo line search on a 1D function. |
| 54 | + * |
| 55 | + * @param f target function |
| 56 | + * @param df first derivative of f |
| 57 | + * @param x0 initial guess |
| 58 | + * @param max_iter maximum number of iterations |
| 59 | + * @return estimated minimum point |
| 60 | + */ |
| 61 | + |
| 62 | +double gradient_descent_armijo(const std::function<double(double)> &f, |
| 63 | + const std::function<double(double)> &df, |
| 64 | + double x0, uint32_t max_iter = 50) { |
| 65 | + double x = x0; |
| 66 | + double grad = df(x); |
| 67 | + double d = -grad; |
| 68 | + int iter = 0; |
| 69 | + |
| 70 | + while (std::abs(grad) > EPSILON && iter < max_iter) { |
| 71 | + double alpha = armijo_backtrack(f, df, x, d); |
| 72 | + x += alpha * d; |
| 73 | + grad = df(x); |
| 74 | + d = -grad; |
| 75 | + std::cout << "Iter " << iter << ": x = " << x << ", f(x) = " << f(x) |
| 76 | + << ", alpha = " << alpha << "\n"; |
| 77 | + iter++; |
| 78 | + } |
| 79 | + |
| 80 | + std::cout << "Converged in " << iter << " iterations.\n"; |
| 81 | + return x; |
| 82 | +} |
| 83 | + |
| 84 | +/** |
| 85 | + * @brief Test case 1: minimize f(x) = (x - 2)^2 |
| 86 | + * \n Expected minimum at x = 2 |
| 87 | + */ |
| 88 | + |
| 89 | +void test1() { |
| 90 | + std::cout << "Test 1... "; |
| 91 | + |
| 92 | + std::function<double(double)> f = [](double x) { return pow(x - 2.0, 2); }; |
| 93 | + std::function<double(double)> df = [](double x) { return 2.0 * (x - 2.0); }; |
| 94 | + |
| 95 | + double xmin = gradient_descent_armijo(f, df, 0.0); |
| 96 | + |
| 97 | + std::cout << "Result = " << xmin << "\n"; |
| 98 | + assert(std::abs(xmin - 2.0) < 1e-5); |
| 99 | + std::cout << "Passed.\n"; |
| 100 | + std::cout << "\n"; |
| 101 | +} |
| 102 | + |
| 103 | +/** |
| 104 | + * @brief Test case 2: minimize f(x) = (x + 3)^2 + 1 |
| 105 | + * \n Expected minimum at x = -3 |
| 106 | + */ |
| 107 | + |
| 108 | +void test2() { |
| 109 | + std::cout << "Test 2... "; |
| 110 | + |
| 111 | + std::function<double(double)> f = [](double x) { return pow(x + 3.0, 2) + 1; }; |
| 112 | + std::function<double(double)> df = [](double x) { return 2.0 * (x + 3.0); }; |
| 113 | + |
| 114 | + double xmin = gradient_descent_armijo(f, df, 5.0); |
| 115 | + |
| 116 | + std::cout << "Result = " << xmin << "\n"; |
| 117 | + assert(std::abs(xmin + 3.0) < 1e-5); |
| 118 | + std::cout << "Passed.\n"; |
| 119 | + std::cout << "\n"; |
| 120 | +} |
| 121 | + |
| 122 | +/** |
| 123 | + * @brief Test case 3: minimize non-convex f(x) = x^4 - 3x^3 + 2 |
| 124 | + * \n Expected local minimum near x ≈ 2.25 |
| 125 | + */ |
| 126 | + |
| 127 | +void test3() { |
| 128 | + std::cout << "Test 3... "; |
| 129 | + |
| 130 | + std::function<double(double)> f = [](double x) { |
| 131 | + return pow(x, 4) - 3 * pow(x, 3) + 2; |
| 132 | + }; |
| 133 | + std::function<double(double)> df = [](double x) { |
| 134 | + return 4 * pow(x, 3) - 9 * pow(x, 2); |
| 135 | + }; |
| 136 | + |
| 137 | + double xmin = gradient_descent_armijo(f, df, 1.0); |
| 138 | + |
| 139 | + std::cout << "Result = " << xmin << "\n"; |
| 140 | + std::cout << "Passed.\n"; |
| 141 | + std::cout << "\n"; |
| 142 | +} |
| 143 | + |
| 144 | +/** @brief Main function */ |
| 145 | +int main() { |
| 146 | + std::cout.precision(9); |
| 147 | + std::cout << "Armijo Backtracking Line Search Example\n"; |
| 148 | + std::cout << "Machine epsilon: " << EPSILON << "\n\n"; |
| 149 | + |
| 150 | + test1(); |
| 151 | + test2(); |
| 152 | + test3(); |
| 153 | + |
| 154 | + return 0; |
| 155 | +} |
0 commit comments