openGPMP
Open Source Mathematics Package
Public Member Functions | List of all members
gpmp::optim::QuasiNewton Class Reference

Class implementing Quasi-Newton optimization methods. More...

#include <quasi.hpp>

Public Member Functions

std::vector< double > bhhh_optimize (const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &initial_point, double tolerance, size_t max_iterations)
 Optimize a function using the Berndt–Hall–Hall–Hausman (BHHH) algorithm. More...
 
std::vector< double > calculate_gradient (const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &point, double epsilon)
 Calculate the gradient of a function at a given point. More...
 
std::vector< std::vector< double > > calculate_bhhh_matrix (const std::vector< double > &gradient)
 Calculate the BHHH matrix from the gradient. More...
 
std::vector< double > update_point (const std::vector< double > &current_point, const std::vector< double > &gradient, const std::vector< std::vector< double >> &bhhh_matrix)
 Update the current point using the BHHH matrix. More...
 
std::vector< double > bfgs_optimize (const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &initial_point, double tolerance, size_t max_iterations)
 Optimize a function using the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm. More...
 
std::vector< double > calculate_search_direction (const std::vector< double > &gradient, const std::vector< std::vector< double >> &hessian_inverse)
 Calculate the search direction using the BFGS method. More...
 
double line_search (const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &current_point, const std::vector< double > &search_direction)
 Perform line search to find an appropriate step size. More...
 
std::vector< double > update_point (const std::vector< double > &current_point, const std::vector< double > &search_direction, double step_size)
 Update the current point using the line search and step size. More...
 
std::vector< double > calculate_gradient_difference (const std::vector< double > &next_point, const std::vector< double > &current_point, const std::vector< double > &gradient)
 Calculate the gradient difference between two points. More...
 
std::vector< std::vector< double > > update_hessian_inverse (const std::vector< std::vector< double >> &hessian_inverse, const std::vector< double > &gradient_difference, const std::vector< double > &search_direction)
 Update the inverse of the Hessian matrix using the BFGS method. More...
 
double dot_product (const std::vector< double > &a, const std::vector< double > &b)
 Calculate the dot product of two vectors. More...
 
std::vector< double > vector_subtraction (const std::vector< double > &a, const std::vector< double > &b) const
 Subtract two vectors element-wise. More...
 
std::tuple< std::vector< double >, double > lbfgs_optimize (const std::function< double(const std::vector< double > &)> &f, const std::vector< double > &initial_point, double tolerance=1e-4, size_t max_iterations=100, size_t memory_size=5)
 Optimize a function using the Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm. More...
 

Detailed Description

Class implementing Quasi-Newton optimization methods.

Definition at line 48 of file quasi.hpp.

Member Function Documentation

◆ bfgs_optimize()

std::vector< double > gpmp::optim::QuasiNewton::bfgs_optimize ( const std::function< double(const std::vector< double > &)> &  func,
const std::vector< double > &  initial_point,
double  tolerance,
size_t  max_iterations 
)

Optimize a function using the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm.

Parameters
funcThe objective function to minimize
initial_pointThe initial guess for the optimal parameters
toleranceThe tolerance for stopping criterion
max_iterationsThe maximum number of iterations
Returns
The vector of optimal parameters

Definition at line 145 of file quasi.cpp.

149  {
150  std::vector<double> current_point = initial_point;
151  size_t n = initial_point.size();
152 
153  // Initialize Hessian approximation as the identity matrix
154  std::vector<std::vector<double>> hessian_inverse(
155  n,
156  std::vector<double>(n, 0.0));
157  for (size_t i = 0; i < n; ++i) {
158  hessian_inverse[i][i] = 1.0;
159  }
160 
161  for (size_t iteration = 0; iteration < max_iterations; ++iteration) {
162  // Calculate the gradient
163  std::vector<double> gradient =
164  calculate_gradient(func, current_point, 1e-6);
165 
166  // Check convergence
167  double gradient_norm = 0.0;
168  for (size_t i = 0; i < n; ++i) {
169  gradient_norm += gradient[i] * gradient[i];
170  }
171  gradient_norm = std::sqrt(gradient_norm);
172 
173  if (gradient_norm < tolerance) {
174  std::cout << "Converged after " << iteration << " iterations."
175  << std::endl;
176  return current_point;
177  }
178 
179  // Calculate search direction
180  std::vector<double> search_direction =
181  calculate_search_direction(gradient, hessian_inverse);
182 
183  // Perform line search to find the step size
184  double step_size = line_search(func, current_point, search_direction);
185 
186  // Update the current point
187  std::vector<double> next_point =
188  update_point(current_point, search_direction, step_size);
189 
190  // Update the Hessian approximation
191  std::vector<double> gradient_difference =
192  calculate_gradient_difference(next_point, current_point, gradient);
193  hessian_inverse = update_hessian_inverse(hessian_inverse,
194  gradient_difference,
195  search_direction);
196 
197  // Move to the next iteration
198  current_point = next_point;
199  }
200 
201  std::cout << "Reached maximum iterations without convergence." << std::endl;
202  return current_point;
203 }
double line_search(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &current_point, const std::vector< double > &search_direction)
Perform line search to find an appropriate step size.
Definition: quasi.cpp:221
std::vector< double > calculate_search_direction(const std::vector< double > &gradient, const std::vector< std::vector< double >> &hessian_inverse)
Calculate the search direction using the BFGS method.
Definition: quasi.cpp:205
std::vector< double > calculate_gradient_difference(const std::vector< double > &next_point, const std::vector< double > &current_point, const std::vector< double > &gradient)
Calculate the gradient difference between two points.
Definition: quasi.cpp:271
std::vector< double > calculate_gradient(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &point, double epsilon)
Calculate the gradient of a function at a given point.
Definition: quasi.cpp:96
std::vector< double > update_point(const std::vector< double > &current_point, const std::vector< double > &gradient, const std::vector< std::vector< double >> &bhhh_matrix)
Update the current point using the BHHH matrix.
Definition: quasi.cpp:131
std::vector< std::vector< double > > update_hessian_inverse(const std::vector< std::vector< double >> &hessian_inverse, const std::vector< double > &gradient_difference, const std::vector< double > &search_direction)
Update the inverse of the Hessian matrix using the BFGS method.
Definition: quasi.cpp:287

◆ bhhh_optimize()

std::vector< double > gpmp::optim::QuasiNewton::bhhh_optimize ( const std::function< double(const std::vector< double > &)> &  func,
const std::vector< double > &  initial_point,
double  tolerance,
size_t  max_iterations 
)

Optimize a function using the Berndt–Hall–Hall–Hausman (BHHH) algorithm.

Parameters
funcThe objective function to minimize
initial_pointThe initial guess for the optimal parameters
toleranceThe tolerance for stopping criterion
max_iterationsThe maximum number of iterations
Returns
The vector of optimal parameters

Definition at line 58 of file quasi.cpp.

62  {
63  std::vector<double> current_point = initial_point;
64  size_t n = initial_point.size();
65 
66  for (size_t iteration = 0; iteration < max_iterations; ++iteration) {
67  // Calculate the gradient
68  std::vector<double> gradient =
69  calculate_gradient(func, current_point, 1e-6);
70 
71  // Check convergence
72  double gradient_norm = 0.0;
73  for (size_t i = 0; i < n; ++i) {
74  gradient_norm += gradient[i] * gradient[i];
75  }
76  gradient_norm = std::sqrt(gradient_norm);
77 
78  if (gradient_norm < tolerance) {
79  std::cout << "Converged after " << iteration << " iterations."
80  << std::endl;
81  return current_point;
82  }
83 
84  // Calculate the BHHH matrix
85  std::vector<std::vector<double>> bhhh_matrix =
86  calculate_bhhh_matrix(gradient);
87 
88  // Update the current point
89  current_point = update_point(current_point, gradient, bhhh_matrix);
90  }
91 
92  std::cout << "Reached maximum iterations without convergence." << std::endl;
93  return current_point;
94 }
std::vector< std::vector< double > > calculate_bhhh_matrix(const std::vector< double > &gradient)
Calculate the BHHH matrix from the gradient.
Definition: quasi.cpp:117

◆ calculate_bhhh_matrix()

std::vector< std::vector< double > > gpmp::optim::QuasiNewton::calculate_bhhh_matrix ( const std::vector< double > &  gradient)

Calculate the BHHH matrix from the gradient.

Parameters
gradientThe gradient vector
Returns
The BHHH matrix

Definition at line 117 of file quasi.cpp.

118  {
119  size_t n = gradient.size();
120  std::vector<std::vector<double>> bhhh_matrix(n, std::vector<double>(n));
121 
122  for (size_t i = 0; i < n; ++i) {
123  for (size_t j = 0; j < n; ++j) {
124  bhhh_matrix[i][j] = gradient[i] * gradient[j];
125  }
126  }
127 
128  return bhhh_matrix;
129 }

◆ calculate_gradient()

std::vector< double > gpmp::optim::QuasiNewton::calculate_gradient ( const std::function< double(const std::vector< double > &)> &  func,
const std::vector< double > &  point,
double  epsilon 
)

Calculate the gradient of a function at a given point.

Parameters
funcThe objective function
pointThe point at which to calculate the gradient
epsilonThe perturbation for finite differences
Returns
The gradient vector

Definition at line 96 of file quasi.cpp.

99  {
100  size_t n = point.size();
101  std::vector<double> gradient(n);
102 
103  for (size_t i = 0; i < n; ++i) {
104  std::vector<double> perturbed_point = point;
105  perturbed_point[i] += epsilon;
106 
107  double perturbed_value = func(perturbed_point);
108  double original_value = func(point);
109 
110  gradient[i] = (perturbed_value - original_value) / epsilon;
111  }
112 
113  return gradient;
114 }

◆ calculate_gradient_difference()

std::vector< double > gpmp::optim::QuasiNewton::calculate_gradient_difference ( const std::vector< double > &  next_point,
const std::vector< double > &  current_point,
const std::vector< double > &  gradient 
)

Calculate the gradient difference between two points.

Parameters
next_pointThe next point
current_pointThe current point
gradientThe gradient vector
Returns
The gradient difference vector

Definition at line 271 of file quasi.cpp.

274  {
275  size_t n = next_point.size();
276  std::vector<double> gradient_difference(n);
277 
278  for (size_t i = 0; i < n; ++i) {
279  gradient_difference[i] =
280  gradient[i] * (next_point[i] - current_point[i]);
281  }
282 
283  return gradient_difference;
284 }

◆ calculate_search_direction()

std::vector< double > gpmp::optim::QuasiNewton::calculate_search_direction ( const std::vector< double > &  gradient,
const std::vector< std::vector< double >> &  hessian_inverse 
)

Calculate the search direction using the BFGS method.

Parameters
gradientThe gradient vector
hessian_inverseThe inverse of the Hessian matrix
Returns
The search direction vector

Definition at line 205 of file quasi.cpp.

207  {
208  size_t n = gradient.size();
209  std::vector<double> search_direction(n);
210 
211  for (size_t i = 0; i < n; ++i) {
212  search_direction[i] = 0.0;
213  for (size_t j = 0; j < n; ++j) {
214  search_direction[i] -= hessian_inverse[i][j] * gradient[j];
215  }
216  }
217 
218  return search_direction;
219 }

◆ dot_product()

double gpmp::optim::QuasiNewton::dot_product ( const std::vector< double > &  a,
const std::vector< double > &  b 
)

Calculate the dot product of two vectors.

Parameters
aThe first vector
bThe second vector
Returns
The dot product value

Definition at line 311 of file quasi.cpp.

312  {
313  // Ensure vectors have the same size
314  if (a.size() != b.size()) {
315  throw std::invalid_argument(
316  "Vectors must have the same size for dot product.");
317  }
318 
319  double result = 0.0;
320  for (size_t i = 0; i < a.size(); ++i) {
321  result += a[i] * b[i];
322  }
323  return result;
324 }

◆ lbfgs_optimize()

std::tuple< std::vector< double >, double > gpmp::optim::QuasiNewton::lbfgs_optimize ( const std::function< double(const std::vector< double > &)> &  f,
const std::vector< double > &  initial_point,
double  tolerance = 1e-4,
size_t  max_iterations = 100,
size_t  memory_size = 5 
)

Optimize a function using the Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm.

Parameters
funcThe objective function to optimize
initial_pointThe initial guess for the optimal point
toleranceThe tolerance for stopping criterion
max_iterationsThe maximum number of iterations
memory_sizeThe size of the limited-memory history (s and y vectors)
Returns
The optimized point that minimizes the given objective function

Definition at line 327 of file quasi.cpp.

332  {
333 
334  const double eps = 1e-8;
335 
336  size_t n = initial_point.size();
337  std::vector<double> x = initial_point;
338  std::vector<double> g(n); // Gradient vector
339  std::vector<std::vector<double>> s(memory_size,
340  std::vector<double>(n)); // s vectors
341  std::vector<std::vector<double>> y(memory_size,
342  std::vector<double>(n)); // y vectors
343  std::vector<double> rho(memory_size); // rho values
344 
345  // Evaluate the objective function and gradient at initial_point
346  double fx = f(x);
347  // Calculate gradient at initial_point
348  // Gradient calculation logic to be implemented
349  // Assign gradient to 'g'
350 
351  for (size_t iter = 0; iter < max_iterations; ++iter) {
352  // Check for convergence
353  double norm_grad = 0.0;
354  for (size_t i = 0; i < n; ++i) {
355  norm_grad += g[i] * g[i];
356  }
357  norm_grad = sqrt(norm_grad);
358  if (norm_grad < tolerance) {
359  break;
360  }
361 
362  // Compute search direction (use initial guess)
363  std::vector<double> d = g;
364 
365  // L-BFGS two-loop recursion
366  size_t start = std::min(iter, memory_size);
367  // for (size_t i = start - 1; i >= 0; --i) {
368  for (size_t i = start; i > 0; --i) {
369 
370  rho[i] = 1.0 /
371  inner_product(s[i].begin(), s[i].end(), y[i].begin(), 0.0);
372  double alpha =
373  rho[i] *
374  inner_product(s[i].begin(), s[i].end(), d.begin(), 0.0);
375  for (size_t j = 0; j < n; ++j) {
376  d[j] -= alpha * y[i][j];
377  }
378  }
379 
380  // Perform scaling
381  for (size_t i = 0; i < n; ++i) {
382  d[i] *= rho[i];
383  }
384 
385  // Compute gradient of the objective function along the search direction
386  // Gradient calculation logic to be implemented
387  // Assign gradient to 'dg'
388  double dg = inner_product(d.begin(), d.end(), g.begin(), 0.0);
389 
390  // Limit curvature
391  if (dg > 0) {
392  break;
393  }
394 
395  // Line search
396  double step_size = 1.0;
397  std::vector<double> x_new = x;
398  for (size_t i = 0; i < n; ++i) {
399  x_new[i] += step_size * d[i];
400  }
401 
402  double fx_new = f(x_new);
403  if (fx_new < fx + eps * step_size * dg) {
404  // Update x
405  x = x_new;
406  fx = fx_new;
407 
408  // Evaluate gradient at new point
409  // Gradient calculation logic to be implemented
410  // Assign gradient to 'g'
411 
412  // Update s and y
413  for (size_t i = 0; i < n; ++i) {
414  s[iter % memory_size][i] = x_new[i] - x[i];
415  y[iter % memory_size][i] = g[i] - d[i];
416  }
417  }
418  }
419 
420  return std::make_tuple(x, fx);
421 }

◆ line_search()

double gpmp::optim::QuasiNewton::line_search ( const std::function< double(const std::vector< double > &)> &  func,
const std::vector< double > &  current_point,
const std::vector< double > &  search_direction 
)

Perform line search to find an appropriate step size.

Parameters
funcThe objective function
current_pointThe current point
search_directionThe search direction vector
Returns
The optimal step size

Definition at line 221 of file quasi.cpp.

224  {
225  const double alpha = 0.001; // Step size multiplier
226  const double beta = 0.5; // Factor for reducing the step size
227  const int maxIterations = 100; // Maximum number of iterations
228  const double minStepSize = 1e-6; // Minimum step size
229 
230  double step_size = 1.0; // Initial step size
231  std::vector<double> updated_point = current_point;
232 
233  // Evaluate the objective function at the current point
234  double f_current = func(current_point);
235 
236  // Calculate the directional derivative (gradient dot search_direction)
237  double directional_derivative =
238  dot_product(calculate_gradient(func, current_point, 1e-6),
239  search_direction);
240 
241  int iteration = 0;
242  while (step_size > minStepSize && iteration < maxIterations) {
243  updated_point =
244  update_point(current_point, search_direction, step_size);
245  double f_updated = func(updated_point);
246  if (f_updated <=
247  f_current + alpha * step_size * directional_derivative) {
248  break; // Stop if Armijo condition is satisfied
249  }
250  step_size *= beta; // Reduce the step size
251  ++iteration;
252  }
253 
254  return step_size;
255 }
double dot_product(const std::vector< double > &a, const std::vector< double > &b)
Calculate the dot product of two vectors.
Definition: quasi.cpp:311

References gpmp::linalg::dot_product().

◆ update_hessian_inverse()

std::vector< std::vector< double > > gpmp::optim::QuasiNewton::update_hessian_inverse ( const std::vector< std::vector< double >> &  hessian_inverse,
const std::vector< double > &  gradient_difference,
const std::vector< double > &  search_direction 
)

Update the inverse of the Hessian matrix using the BFGS method.

Parameters
hessian_inverseThe current inverse of the Hessian matrix
gradient_differenceThe gradient difference vector
search_directionThe search direction vector
Returns
The updated inverse of the Hessian matrix

Definition at line 287 of file quasi.cpp.

290  {
291 
292  size_t n = hessian_inverse.size();
293  std::vector<std::vector<double>> updated_hessian_inverse(
294  n,
295  std::vector<double>(n));
296 
297  // Update Hessian using BFGS update formula
298  double rho = dot_product(gradient_difference, search_direction);
299 
300  for (size_t i = 0; i < n; ++i) {
301  for (size_t j = 0; j < n; ++j) {
302  updated_hessian_inverse[i][j] =
303  hessian_inverse[i][j] +
304  rho * gradient_difference[i] * gradient_difference[j];
305  }
306  }
307 
308  return updated_hessian_inverse;
309 }

References gpmp::linalg::dot_product().

◆ update_point() [1/2]

std::vector< double > gpmp::optim::QuasiNewton::update_point ( const std::vector< double > &  current_point,
const std::vector< double > &  gradient,
const std::vector< std::vector< double >> &  bhhh_matrix 
)

Update the current point using the BHHH matrix.

Parameters
current_pointThe current point
gradientThe gradient vector
bhhh_matrixThe BHHH matrix
Returns
The updated point

Definition at line 131 of file quasi.cpp.

134  {
135  size_t n = current_point.size();
136  std::vector<double> updated_point(n);
137 
138  for (size_t i = 0; i < n; ++i) {
139  updated_point[i] = current_point[i] - gradient[i] / bhhh_matrix[i][i];
140  }
141 
142  return updated_point;
143 }

◆ update_point() [2/2]

std::vector< double > gpmp::optim::QuasiNewton::update_point ( const std::vector< double > &  current_point,
const std::vector< double > &  search_direction,
double  step_size 
)

Update the current point using the line search and step size.

Parameters
current_pointThe current point
search_directionThe search direction vector
step_sizeThe step size from the line search
Returns
The updated point

Definition at line 257 of file quasi.cpp.

260  {
261  size_t n = current_point.size();
262  std::vector<double> updated_point(n);
263 
264  for (size_t i = 0; i < n; ++i) {
265  updated_point[i] = current_point[i] + step_size * search_direction[i];
266  }
267 
268  return updated_point;
269 }

◆ vector_subtraction()

std::vector< double > gpmp::optim::QuasiNewton::vector_subtraction ( const std::vector< double > &  a,
const std::vector< double > &  b 
) const

Subtract two vectors element-wise.

Parameters
aThe first vector
bThe second vector
Returns
The result of subtracting each element of vector b from vector a

Definition at line 40 of file quasi.cpp.

42  {
43  if (a.size() != b.size()) {
44  throw std::invalid_argument(
45  "Error: Vector dimensions do not match for subtraction.");
46  }
47 
48  std::vector<double> result;
49  result.reserve(a.size());
50 
51  for (size_t i = 0; i < a.size(); ++i) {
52  result.push_back(a[i] - b[i]);
53  }
54 
55  return result;
56 }

The documentation for this class was generated from the following files: