openGPMP
Open Source Mathematics Package
quasi.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file.
25  *
26  *
27  *
28  * This software is distributed on an AS IS basis, WITHOUT
29  * WARRANTY OF ANY KIND, either express or implied.
30  *
31  ************************************************************************/
32 #include <cmath>
33 #include <functional>
34 #include <iostream>
35 #include <numeric>
36 #include <openGPMP/optim/quasi.hpp>
37 #include <tuple>
38 #include <vector>
39 
41  const std::vector<double> &a,
42  const std::vector<double> &b) const {
43  if (a.size() != b.size()) {
44  throw std::invalid_argument(
45  "Error: Vector dimensions do not match for subtraction.");
46  }
47 
48  std::vector<double> result;
49  result.reserve(a.size());
50 
51  for (size_t i = 0; i < a.size(); ++i) {
52  result.push_back(a[i] - b[i]);
53  }
54 
55  return result;
56 }
57 
59  const std::function<double(const std::vector<double> &)> &func,
60  const std::vector<double> &initial_point,
61  double tolerance,
62  size_t max_iterations) {
63  std::vector<double> current_point = initial_point;
64  size_t n = initial_point.size();
65 
66  for (size_t iteration = 0; iteration < max_iterations; ++iteration) {
67  // Calculate the gradient
68  std::vector<double> gradient =
69  calculate_gradient(func, current_point, 1e-6);
70 
71  // Check convergence
72  double gradient_norm = 0.0;
73  for (size_t i = 0; i < n; ++i) {
74  gradient_norm += gradient[i] * gradient[i];
75  }
76  gradient_norm = std::sqrt(gradient_norm);
77 
78  if (gradient_norm < tolerance) {
79  std::cout << "Converged after " << iteration << " iterations."
80  << std::endl;
81  return current_point;
82  }
83 
84  // Calculate the BHHH matrix
85  std::vector<std::vector<double>> bhhh_matrix =
86  calculate_bhhh_matrix(gradient);
87 
88  // Update the current point
89  current_point = update_point(current_point, gradient, bhhh_matrix);
90  }
91 
92  std::cout << "Reached maximum iterations without convergence." << std::endl;
93  return current_point;
94 }
95 
97  const std::function<double(const std::vector<double> &)> &func,
98  const std::vector<double> &point,
99  double epsilon) {
100  size_t n = point.size();
101  std::vector<double> gradient(n);
102 
103  for (size_t i = 0; i < n; ++i) {
104  std::vector<double> perturbed_point = point;
105  perturbed_point[i] += epsilon;
106 
107  double perturbed_value = func(perturbed_point);
108  double original_value = func(point);
109 
110  gradient[i] = (perturbed_value - original_value) / epsilon;
111  }
112 
113  return gradient;
114 }
115 
116 std::vector<std::vector<double>>
118  const std::vector<double> &gradient) {
119  size_t n = gradient.size();
120  std::vector<std::vector<double>> bhhh_matrix(n, std::vector<double>(n));
121 
122  for (size_t i = 0; i < n; ++i) {
123  for (size_t j = 0; j < n; ++j) {
124  bhhh_matrix[i][j] = gradient[i] * gradient[j];
125  }
126  }
127 
128  return bhhh_matrix;
129 }
130 
132  const std::vector<double> &current_point,
133  const std::vector<double> &gradient,
134  const std::vector<std::vector<double>> &bhhh_matrix) {
135  size_t n = current_point.size();
136  std::vector<double> updated_point(n);
137 
138  for (size_t i = 0; i < n; ++i) {
139  updated_point[i] = current_point[i] - gradient[i] / bhhh_matrix[i][i];
140  }
141 
142  return updated_point;
143 }
144 
146  const std::function<double(const std::vector<double> &)> &func,
147  const std::vector<double> &initial_point,
148  double tolerance,
149  size_t max_iterations) {
150  std::vector<double> current_point = initial_point;
151  size_t n = initial_point.size();
152 
153  // Initialize Hessian approximation as the identity matrix
154  std::vector<std::vector<double>> hessian_inverse(
155  n,
156  std::vector<double>(n, 0.0));
157  for (size_t i = 0; i < n; ++i) {
158  hessian_inverse[i][i] = 1.0;
159  }
160 
161  for (size_t iteration = 0; iteration < max_iterations; ++iteration) {
162  // Calculate the gradient
163  std::vector<double> gradient =
164  calculate_gradient(func, current_point, 1e-6);
165 
166  // Check convergence
167  double gradient_norm = 0.0;
168  for (size_t i = 0; i < n; ++i) {
169  gradient_norm += gradient[i] * gradient[i];
170  }
171  gradient_norm = std::sqrt(gradient_norm);
172 
173  if (gradient_norm < tolerance) {
174  std::cout << "Converged after " << iteration << " iterations."
175  << std::endl;
176  return current_point;
177  }
178 
179  // Calculate search direction
180  std::vector<double> search_direction =
181  calculate_search_direction(gradient, hessian_inverse);
182 
183  // Perform line search to find the step size
184  double step_size = line_search(func, current_point, search_direction);
185 
186  // Update the current point
187  std::vector<double> next_point =
188  update_point(current_point, search_direction, step_size);
189 
190  // Update the Hessian approximation
191  std::vector<double> gradient_difference =
192  calculate_gradient_difference(next_point, current_point, gradient);
193  hessian_inverse = update_hessian_inverse(hessian_inverse,
194  gradient_difference,
195  search_direction);
196 
197  // Move to the next iteration
198  current_point = next_point;
199  }
200 
201  std::cout << "Reached maximum iterations without convergence." << std::endl;
202  return current_point;
203 }
204 
206  const std::vector<double> &gradient,
207  const std::vector<std::vector<double>> &hessian_inverse) {
208  size_t n = gradient.size();
209  std::vector<double> search_direction(n);
210 
211  for (size_t i = 0; i < n; ++i) {
212  search_direction[i] = 0.0;
213  for (size_t j = 0; j < n; ++j) {
214  search_direction[i] -= hessian_inverse[i][j] * gradient[j];
215  }
216  }
217 
218  return search_direction;
219 }
220 
222  const std::function<double(const std::vector<double> &)> &func,
223  const std::vector<double> &current_point,
224  const std::vector<double> &search_direction) {
225  const double alpha = 0.001; // Step size multiplier
226  const double beta = 0.5; // Factor for reducing the step size
227  const int maxIterations = 100; // Maximum number of iterations
228  const double minStepSize = 1e-6; // Minimum step size
229 
230  double step_size = 1.0; // Initial step size
231  std::vector<double> updated_point = current_point;
232 
233  // Evaluate the objective function at the current point
234  double f_current = func(current_point);
235 
236  // Calculate the directional derivative (gradient dot search_direction)
237  double directional_derivative =
238  dot_product(calculate_gradient(func, current_point, 1e-6),
239  search_direction);
240 
241  int iteration = 0;
242  while (step_size > minStepSize && iteration < maxIterations) {
243  updated_point =
244  update_point(current_point, search_direction, step_size);
245  double f_updated = func(updated_point);
246  if (f_updated <=
247  f_current + alpha * step_size * directional_derivative) {
248  break; // Stop if Armijo condition is satisfied
249  }
250  step_size *= beta; // Reduce the step size
251  ++iteration;
252  }
253 
254  return step_size;
255 }
256 
258  const std::vector<double> &current_point,
259  const std::vector<double> &search_direction,
260  double step_size) {
261  size_t n = current_point.size();
262  std::vector<double> updated_point(n);
263 
264  for (size_t i = 0; i < n; ++i) {
265  updated_point[i] = current_point[i] + step_size * search_direction[i];
266  }
267 
268  return updated_point;
269 }
270 
272  const std::vector<double> &next_point,
273  const std::vector<double> &current_point,
274  const std::vector<double> &gradient) {
275  size_t n = next_point.size();
276  std::vector<double> gradient_difference(n);
277 
278  for (size_t i = 0; i < n; ++i) {
279  gradient_difference[i] =
280  gradient[i] * (next_point[i] - current_point[i]);
281  }
282 
283  return gradient_difference;
284 }
285 
286 std::vector<std::vector<double>>
288  const std::vector<std::vector<double>> &hessian_inverse,
289  const std::vector<double> &gradient_difference,
290  const std::vector<double> &search_direction) {
291 
292  size_t n = hessian_inverse.size();
293  std::vector<std::vector<double>> updated_hessian_inverse(
294  n,
295  std::vector<double>(n));
296 
297  // Update Hessian using BFGS update formula
298  double rho = dot_product(gradient_difference, search_direction);
299 
300  for (size_t i = 0; i < n; ++i) {
301  for (size_t j = 0; j < n; ++j) {
302  updated_hessian_inverse[i][j] =
303  hessian_inverse[i][j] +
304  rho * gradient_difference[i] * gradient_difference[j];
305  }
306  }
307 
308  return updated_hessian_inverse;
309 }
310 
311 double gpmp::optim::QuasiNewton::dot_product(const std::vector<double> &a,
312  const std::vector<double> &b) {
313  // Ensure vectors have the same size
314  if (a.size() != b.size()) {
315  throw std::invalid_argument(
316  "Vectors must have the same size for dot product.");
317  }
318 
319  double result = 0.0;
320  for (size_t i = 0; i < a.size(); ++i) {
321  result += a[i] * b[i];
322  }
323  return result;
324 }
325 
326 std::tuple<std::vector<double>, double>
328  const std::function<double(const std::vector<double> &)> &f,
329  const std::vector<double> &initial_point,
330  double tolerance,
331  size_t max_iterations,
332  size_t memory_size) {
333 
334  const double eps = 1e-8;
335 
336  size_t n = initial_point.size();
337  std::vector<double> x = initial_point;
338  std::vector<double> g(n); // Gradient vector
339  std::vector<std::vector<double>> s(memory_size,
340  std::vector<double>(n)); // s vectors
341  std::vector<std::vector<double>> y(memory_size,
342  std::vector<double>(n)); // y vectors
343  std::vector<double> rho(memory_size); // rho values
344 
345  // Evaluate the objective function and gradient at initial_point
346  double fx = f(x);
347  // Calculate gradient at initial_point
348  // Gradient calculation logic to be implemented
349  // Assign gradient to 'g'
350 
351  for (size_t iter = 0; iter < max_iterations; ++iter) {
352  // Check for convergence
353  double norm_grad = 0.0;
354  for (size_t i = 0; i < n; ++i) {
355  norm_grad += g[i] * g[i];
356  }
357  norm_grad = sqrt(norm_grad);
358  if (norm_grad < tolerance) {
359  break;
360  }
361 
362  // Compute search direction (use initial guess)
363  std::vector<double> d = g;
364 
365  // L-BFGS two-loop recursion
366  size_t start = std::min(iter, memory_size);
367  // for (size_t i = start - 1; i >= 0; --i) {
368  for (size_t i = start; i > 0; --i) {
369 
370  rho[i] = 1.0 /
371  inner_product(s[i].begin(), s[i].end(), y[i].begin(), 0.0);
372  double alpha =
373  rho[i] *
374  inner_product(s[i].begin(), s[i].end(), d.begin(), 0.0);
375  for (size_t j = 0; j < n; ++j) {
376  d[j] -= alpha * y[i][j];
377  }
378  }
379 
380  // Perform scaling
381  for (size_t i = 0; i < n; ++i) {
382  d[i] *= rho[i];
383  }
384 
385  // Compute gradient of the objective function along the search direction
386  // Gradient calculation logic to be implemented
387  // Assign gradient to 'dg'
388  double dg = inner_product(d.begin(), d.end(), g.begin(), 0.0);
389 
390  // Limit curvature
391  if (dg > 0) {
392  break;
393  }
394 
395  // Line search
396  double step_size = 1.0;
397  std::vector<double> x_new = x;
398  for (size_t i = 0; i < n; ++i) {
399  x_new[i] += step_size * d[i];
400  }
401 
402  double fx_new = f(x_new);
403  if (fx_new < fx + eps * step_size * dg) {
404  // Update x
405  x = x_new;
406  fx = fx_new;
407 
408  // Evaluate gradient at new point
409  // Gradient calculation logic to be implemented
410  // Assign gradient to 'g'
411 
412  // Update s and y
413  for (size_t i = 0; i < n; ++i) {
414  s[iter % memory_size][i] = x_new[i] - x[i];
415  y[iter % memory_size][i] = g[i] - d[i];
416  }
417  }
418  }
419 
420  return std::make_tuple(x, fx);
421 }
double line_search(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &current_point, const std::vector< double > &search_direction)
Perform line search to find an appropriate step size.
Definition: quasi.cpp:221
std::vector< double > bhhh_optimize(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &initial_point, double tolerance, size_t max_iterations)
Optimize a function using the Berndt–Hall–Hall–Hausman (BHHH) algorithm.
Definition: quasi.cpp:58
std::vector< double > calculate_search_direction(const std::vector< double > &gradient, const std::vector< std::vector< double >> &hessian_inverse)
Calculate the search direction using the BFGS method.
Definition: quasi.cpp:205
std::vector< std::vector< double > > calculate_bhhh_matrix(const std::vector< double > &gradient)
Calculate the BHHH matrix from the gradient.
Definition: quasi.cpp:117
std::vector< double > calculate_gradient_difference(const std::vector< double > &next_point, const std::vector< double > &current_point, const std::vector< double > &gradient)
Calculate the gradient difference between two points.
Definition: quasi.cpp:271
double dot_product(const std::vector< double > &a, const std::vector< double > &b)
Calculate the dot product of two vectors.
Definition: quasi.cpp:311
std::vector< double > calculate_gradient(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &point, double epsilon)
Calculate the gradient of a function at a given point.
Definition: quasi.cpp:96
std::vector< double > update_point(const std::vector< double > &current_point, const std::vector< double > &gradient, const std::vector< std::vector< double >> &bhhh_matrix)
Update the current point using the BHHH matrix.
Definition: quasi.cpp:131
std::tuple< std::vector< double >, double > lbfgs_optimize(const std::function< double(const std::vector< double > &)> &f, const std::vector< double > &initial_point, double tolerance=1e-4, size_t max_iterations=100, size_t memory_size=5)
Optimize a function using the Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm.
Definition: quasi.cpp:327
std::vector< double > bfgs_optimize(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &initial_point, double tolerance, size_t max_iterations)
Optimize a function using the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm.
Definition: quasi.cpp:145
std::vector< std::vector< double > > update_hessian_inverse(const std::vector< std::vector< double >> &hessian_inverse, const std::vector< double > &gradient_difference, const std::vector< double > &search_direction)
Update the inverse of the Hessian matrix using the BFGS method.
Definition: quasi.cpp:287
std::vector< double > vector_subtraction(const std::vector< double > &a, const std::vector< double > &b) const
Subtract two vectors element-wise.
Definition: quasi.cpp:40
int dot_product(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2)
Computes the dot product for vectors of signed 8-bit integers.