LCOV - lcov.info - modules/optim/quasi.cpp

LCOV - code coverage report

Current view:	top level - modules/optim - quasi.cpp (source / functions)		Hit	Total	Coverage
Test:	lcov.info	Lines:	118	185	63.8 %
Date:	2024-05-13 05:06:06	Functions:	10	13	76.9 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*************************************************************************
       2             :  *
       3             :  *  Project
       4             :  *                         _____ _____  __  __ _____
       5             :  *                        / ____|  __ \|  \/  |  __ \
       6             :  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |
       7             :  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/
       8             :  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |
       9             :  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|
      10             :  *      | |
      11             :  *      |_|
      12             :  *
      13             :  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
      14             :  *
      15             :  * This software is licensed as described in the file LICENSE, which
      16             :  * you should have received as part of this distribution. The terms
      17             :  * among other details are referenced in the official documentation
      18             :  * seen here : https://akielaries.github.io/openGPMP/ along with
      19             :  * important files seen in this project.
      20             :  *
      21             :  * You may opt to use, copy, modify, merge, publish, distribute
      22             :  * and/or sell copies of the Software, and permit persons to whom
      23             :  * the Software is furnished to do so, under the terms of the
      24             :  * LICENSE file.
      25             :  *
      26             :  *
      27             :  *
      28             :  * This software is distributed on an AS IS basis, WITHOUT
      29             :  * WARRANTY OF ANY KIND, either express or implied.
      30             :  *
      31             :  ************************************************************************/
      32             : #include <cmath>
      33             : #include <functional>
      34             : #include <iostream>
      35             : #include <numeric>
      36             : #include <openGPMP/optim/quasi.hpp>
      37             : #include <tuple>
      38             : #include <vector>
      39             : 
      40           0 : std::vector<double> gpmp::optim::QuasiNewton::vector_subtraction(
      41             :     const std::vector<double> &a,
      42             :     const std::vector<double> &b) const {
      43           0 :     if (a.size() != b.size()) {
      44           0 :         throw std::invalid_argument(
      45           0 :             "Error: Vector dimensions do not match for subtraction.");
      46             :     }
      47             : 
      48           0 :     std::vector<double> result;
      49           0 :     result.reserve(a.size());
      50             : 
      51           0 :     for (size_t i = 0; i < a.size(); ++i) {
      52           0 :         result.push_back(a[i] - b[i]);
      53             :     }
      54             : 
      55           0 :     return result;
      56           0 : }
      57             : 
      58           0 : std::vector<double> gpmp::optim::QuasiNewton::bhhh_optimize(
      59             :     const std::function<double(const std::vector<double> &)> &func,
      60             :     const std::vector<double> &initial_point,
      61             :     double tolerance,
      62             :     size_t max_iterations) {
      63           0 :     std::vector<double> current_point = initial_point;
      64           0 :     size_t n = initial_point.size();
      65             : 
      66           0 :     for (size_t iteration = 0; iteration < max_iterations; ++iteration) {
      67             :         // Calculate the gradient
      68             :         std::vector<double> gradient =
      69           0 :             calculate_gradient(func, current_point, 1e-6);
      70             : 
      71             :         // Check convergence
      72           0 :         double gradient_norm = 0.0;
      73           0 :         for (size_t i = 0; i < n; ++i) {
      74           0 :             gradient_norm += gradient[i] * gradient[i];
      75             :         }
      76           0 :         gradient_norm = std::sqrt(gradient_norm);
      77             : 
      78           0 :         if (gradient_norm < tolerance) {
      79           0 :             std::cout << "Converged after " << iteration << " iterations."
      80           0 :                       << std::endl;
      81           0 :             return current_point;
      82             :         }
      83             : 
      84             :         // Calculate the BHHH matrix
      85             :         std::vector<std::vector<double>> bhhh_matrix =
      86           0 :             calculate_bhhh_matrix(gradient);
      87             : 
      88             :         // Update the current point
      89           0 :         current_point = update_point(current_point, gradient, bhhh_matrix);
      90           0 :     }
      91             : 
      92           0 :     std::cout << "Reached maximum iterations without convergence." << std::endl;
      93           0 :     return current_point;
      94           0 : }
      95             : 
      96           7 : std::vector<double> gpmp::optim::QuasiNewton::calculate_gradient(
      97             :     const std::function<double(const std::vector<double> &)> &func,
      98             :     const std::vector<double> &point,
      99             :     double epsilon) {
     100           7 :     size_t n = point.size();
     101           7 :     std::vector<double> gradient(n);
     102             : 
     103          21 :     for (size_t i = 0; i < n; ++i) {
     104          14 :         std::vector<double> perturbed_point = point;
     105          14 :         perturbed_point[i] += epsilon;
     106             : 
     107          14 :         double perturbed_value = func(perturbed_point);
     108          14 :         double original_value = func(point);
     109             : 
     110          14 :         gradient[i] = (perturbed_value - original_value) / epsilon;
     111          14 :     }
     112             : 
     113           7 :     return gradient;
     114           0 : }
     115             : 
     116             : std::vector<std::vector<double>>
     117           4 : gpmp::optim::QuasiNewton::calculate_bhhh_matrix(
     118             :     const std::vector<double> &gradient) {
     119           4 :     size_t n = gradient.size();
     120           8 :     std::vector<std::vector<double>> bhhh_matrix(n, std::vector<double>(n));
     121             : 
     122          16 :     for (size_t i = 0; i < n; ++i) {
     123          48 :         for (size_t j = 0; j < n; ++j) {
     124          36 :             bhhh_matrix[i][j] = gradient[i] * gradient[j];
     125             :         }
     126             :     }
     127             : 
     128           4 :     return bhhh_matrix;
     129             : }
     130             : 
     131           0 : std::vector<double> gpmp::optim::QuasiNewton::update_point(
     132             :     const std::vector<double> &current_point,
     133             :     const std::vector<double> &gradient,
     134             :     const std::vector<std::vector<double>> &bhhh_matrix) {
     135           0 :     size_t n = current_point.size();
     136           0 :     std::vector<double> updated_point(n);
     137             : 
     138           0 :     for (size_t i = 0; i < n; ++i) {
     139           0 :         updated_point[i] = current_point[i] - gradient[i] / bhhh_matrix[i][i];
     140             :     }
     141             : 
     142           0 :     return updated_point;
     143             : }
     144             : 
     145           1 : std::vector<double> gpmp::optim::QuasiNewton::bfgs_optimize(
     146             :     const std::function<double(const std::vector<double> &)> &func,
     147             :     const std::vector<double> &initial_point,
     148             :     double tolerance,
     149             :     size_t max_iterations) {
     150           1 :     std::vector<double> current_point = initial_point;
     151           1 :     size_t n = initial_point.size();
     152             : 
     153             :     // Initialize Hessian approximation as the identity matrix
     154             :     std::vector<std::vector<double>> hessian_inverse(
     155             :         n,
     156           2 :         std::vector<double>(n, 0.0));
     157           3 :     for (size_t i = 0; i < n; ++i) {
     158           2 :         hessian_inverse[i][i] = 1.0;
     159             :     }
     160             : 
     161           2 :     for (size_t iteration = 0; iteration < max_iterations; ++iteration) {
     162             :         // Calculate the gradient
     163             :         std::vector<double> gradient =
     164           2 :             calculate_gradient(func, current_point, 1e-6);
     165             : 
     166             :         // Check convergence
     167           2 :         double gradient_norm = 0.0;
     168           6 :         for (size_t i = 0; i < n; ++i) {
     169           4 :             gradient_norm += gradient[i] * gradient[i];
     170             :         }
     171           2 :         gradient_norm = std::sqrt(gradient_norm);
     172             : 
     173           2 :         if (gradient_norm < tolerance) {
     174           1 :             std::cout << "Converged after " << iteration << " iterations."
     175           1 :                       << std::endl;
     176           1 :             return current_point;
     177             :         }
     178             : 
     179             :         // Calculate search direction
     180             :         std::vector<double> search_direction =
     181           1 :             calculate_search_direction(gradient, hessian_inverse);
     182             : 
     183             :         // Perform line search to find the step size
     184           1 :         double step_size = line_search(func, current_point, search_direction);
     185             : 
     186             :         // Update the current point
     187             :         std::vector<double> next_point =
     188           1 :             update_point(current_point, search_direction, step_size);
     189             : 
     190             :         // Update the Hessian approximation
     191             :         std::vector<double> gradient_difference =
     192           1 :             calculate_gradient_difference(next_point, current_point, gradient);
     193           2 :         hessian_inverse = update_hessian_inverse(hessian_inverse,
     194             :                                                  gradient_difference,
     195           1 :                                                  search_direction);
     196             : 
     197             :         // Move to the next iteration
     198           1 :         current_point = next_point;
     199           2 :     }
     200             : 
     201           0 :     std::cout << "Reached maximum iterations without convergence." << std::endl;
     202           0 :     return current_point;
     203           1 : }
     204             : 
     205           4 : std::vector<double> gpmp::optim::QuasiNewton::calculate_search_direction(
     206             :     const std::vector<double> &gradient,
     207             :     const std::vector<std::vector<double>> &hessian_inverse) {
     208           4 :     size_t n = gradient.size();
     209           4 :     std::vector<double> search_direction(n);
     210             : 
     211          12 :     for (size_t i = 0; i < n; ++i) {
     212           8 :         search_direction[i] = 0.0;
     213          24 :         for (size_t j = 0; j < n; ++j) {
     214          16 :             search_direction[i] -= hessian_inverse[i][j] * gradient[j];
     215             :         }
     216             :     }
     217             : 
     218           4 :     return search_direction;
     219             : }
     220             : 
     221           3 : double gpmp::optim::QuasiNewton::line_search(
     222             :     const std::function<double(const std::vector<double> &)> &func,
     223             :     const std::vector<double> &current_point,
     224             :     const std::vector<double> &search_direction) {
     225           3 :     const double alpha = 0.001;      // Step size multiplier
     226           3 :     const double beta = 0.5;         // Factor for reducing the step size
     227           3 :     const int maxIterations = 100;   // Maximum number of iterations
     228           3 :     const double minStepSize = 1e-6; // Minimum step size
     229             : 
     230           3 :     double step_size = 1.0; // Initial step size
     231           3 :     std::vector<double> updated_point = current_point;
     232             : 
     233             :     // Evaluate the objective function at the current point
     234           3 :     double f_current = func(current_point);
     235             : 
     236             :     // Calculate the directional derivative (gradient dot search_direction)
     237             :     double directional_derivative =
     238           3 :         dot_product(calculate_gradient(func, current_point, 1e-6),
     239             :                     search_direction);
     240             : 
     241           3 :     int iteration = 0;
     242          24 :     while (step_size > minStepSize && iteration < maxIterations) {
     243             :         updated_point =
     244          23 :             update_point(current_point, search_direction, step_size);
     245          23 :         double f_updated = func(updated_point);
     246          23 :         if (f_updated <=
     247          23 :             f_current + alpha * step_size * directional_derivative) {
     248           2 :             break; // Stop if Armijo condition is satisfied
     249             :         }
     250          21 :         step_size *= beta; // Reduce the step size
     251          21 :         ++iteration;
     252             :     }
     253             : 
     254           3 :     return step_size;
     255           3 : }
     256             : 
     257          27 : std::vector<double> gpmp::optim::QuasiNewton::update_point(
     258             :     const std::vector<double> &current_point,
     259             :     const std::vector<double> &search_direction,
     260             :     double step_size) {
     261          27 :     size_t n = current_point.size();
     262          27 :     std::vector<double> updated_point(n);
     263             : 
     264          81 :     for (size_t i = 0; i < n; ++i) {
     265          54 :         updated_point[i] = current_point[i] + step_size * search_direction[i];
     266             :     }
     267             : 
     268          27 :     return updated_point;
     269             : }
     270             : 
     271           1 : std::vector<double> gpmp::optim::QuasiNewton::calculate_gradient_difference(
     272             :     const std::vector<double> &next_point,
     273             :     const std::vector<double> &current_point,
     274             :     const std::vector<double> &gradient) {
     275           1 :     size_t n = next_point.size();
     276           1 :     std::vector<double> gradient_difference(n);
     277             : 
     278           3 :     for (size_t i = 0; i < n; ++i) {
     279           2 :         gradient_difference[i] =
     280           2 :             gradient[i] * (next_point[i] - current_point[i]);
     281             :     }
     282             : 
     283           1 :     return gradient_difference;
     284             : }
     285             : 
     286             : std::vector<std::vector<double>>
     287           2 : gpmp::optim::QuasiNewton::update_hessian_inverse(
     288             :     const std::vector<std::vector<double>> &hessian_inverse,
     289             :     const std::vector<double> &gradient_difference,
     290             :     const std::vector<double> &search_direction) {
     291             : 
     292           2 :     size_t n = hessian_inverse.size();
     293             :     std::vector<std::vector<double>> updated_hessian_inverse(
     294             :         n,
     295           4 :         std::vector<double>(n));
     296             : 
     297             :     // Update Hessian using BFGS update formula
     298           2 :     double rho = dot_product(gradient_difference, search_direction);
     299             : 
     300           6 :     for (size_t i = 0; i < n; ++i) {
     301          12 :         for (size_t j = 0; j < n; ++j) {
     302           8 :             updated_hessian_inverse[i][j] =
     303           8 :                 hessian_inverse[i][j] +
     304           8 :                 rho * gradient_difference[i] * gradient_difference[j];
     305             :         }
     306             :     }
     307             : 
     308           2 :     return updated_hessian_inverse;
     309           0 : }
     310             : 
     311           9 : double gpmp::optim::QuasiNewton::dot_product(const std::vector<double> &a,
     312             :                                              const std::vector<double> &b) {
     313             :     // Ensure vectors have the same size
     314           9 :     if (a.size() != b.size()) {
     315           0 :         throw std::invalid_argument(
     316           0 :             "Vectors must have the same size for dot product.");
     317             :     }
     318             : 
     319           9 :     double result = 0.0;
     320          31 :     for (size_t i = 0; i < a.size(); ++i) {
     321          22 :         result += a[i] * b[i];
     322             :     }
     323           9 :     return result;
     324             : }
     325             : 
     326             : std::tuple<std::vector<double>, double>
     327           2 : gpmp::optim::QuasiNewton::lbfgs_optimize(
     328             :     const std::function<double(const std::vector<double> &)> &f,
     329             :     const std::vector<double> &initial_point,
     330             :     double tolerance,
     331             :     size_t max_iterations,
     332             :     size_t memory_size) {
     333             : 
     334           2 :     const double eps = 1e-8;
     335             : 
     336           2 :     size_t n = initial_point.size();
     337           2 :     std::vector<double> x = initial_point;
     338           2 :     std::vector<double> g(n); // Gradient vector
     339             :     std::vector<std::vector<double>> s(memory_size,
     340           4 :                                        std::vector<double>(n)); // s vectors
     341             :     std::vector<std::vector<double>> y(memory_size,
     342           4 :                                        std::vector<double>(n)); // y vectors
     343           2 :     std::vector<double> rho(memory_size);                       // rho values
     344             : 
     345             :     // Evaluate the objective function and gradient at initial_point
     346           2 :     double fx = f(x);
     347             :     // Calculate gradient at initial_point
     348             :     // Gradient calculation logic to be implemented
     349             :     // Assign gradient to 'g'
     350             : 
     351           2 :     for (size_t iter = 0; iter < max_iterations; ++iter) {
     352             :         // Check for convergence
     353           2 :         double norm_grad = 0.0;
     354           6 :         for (size_t i = 0; i < n; ++i) {
     355           4 :             norm_grad += g[i] * g[i];
     356             :         }
     357           2 :         norm_grad = sqrt(norm_grad);
     358           2 :         if (norm_grad < tolerance) {
     359           2 :             break;
     360             :         }
     361             : 
     362             :         // Compute search direction (use initial guess)
     363           0 :         std::vector<double> d = g;
     364             : 
     365             :         // L-BFGS two-loop recursion
     366           0 :         size_t start = std::min(iter, memory_size);
     367             :         // for (size_t i = start - 1; i >= 0; --i) {
     368           0 :         for (size_t i = start; i > 0; --i) {
     369             : 
     370           0 :             rho[i] = 1.0 /
     371           0 :                      inner_product(s[i].begin(), s[i].end(), y[i].begin(), 0.0);
     372             :             double alpha =
     373           0 :                 rho[i] *
     374           0 :                 inner_product(s[i].begin(), s[i].end(), d.begin(), 0.0);
     375           0 :             for (size_t j = 0; j < n; ++j) {
     376           0 :                 d[j] -= alpha * y[i][j];
     377             :             }
     378             :         }
     379             : 
     380             :         // Perform scaling
     381           0 :         for (size_t i = 0; i < n; ++i) {
     382           0 :             d[i] *= rho[i];
     383             :         }
     384             : 
     385             :         // Compute gradient of the objective function along the search direction
     386             :         // Gradient calculation logic to be implemented
     387             :         // Assign gradient to 'dg'
     388           0 :         double dg = inner_product(d.begin(), d.end(), g.begin(), 0.0);
     389             : 
     390             :         // Limit curvature
     391           0 :         if (dg > 0) {
     392           0 :             break;
     393             :         }
     394             : 
     395             :         // Line search
     396           0 :         double step_size = 1.0;
     397           0 :         std::vector<double> x_new = x;
     398           0 :         for (size_t i = 0; i < n; ++i) {
     399           0 :             x_new[i] += step_size * d[i];
     400             :         }
     401             : 
     402           0 :         double fx_new = f(x_new);
     403           0 :         if (fx_new < fx + eps * step_size * dg) {
     404             :             // Update x
     405           0 :             x = x_new;
     406           0 :             fx = fx_new;
     407             : 
     408             :             // Evaluate gradient at new point
     409             :             // Gradient calculation logic to be implemented
     410             :             // Assign gradient to 'g'
     411             : 
     412             :             // Update s and y
     413           0 :             for (size_t i = 0; i < n; ++i) {
     414           0 :                 s[iter % memory_size][i] = x_new[i] - x[i];
     415           0 :                 y[iter % memory_size][i] = g[i] - d[i];
     416             :             }
     417             :         }
     418           0 :     }
     419             : 
     420           4 :     return std::make_tuple(x, fx);
     421           2 : }

Generated by: LCOV version 1.14