Line data Source code
1 : /*************************************************************************
2 : *
3 : * Project
4 : * _____ _____ __ __ _____
5 : * / ____| __ \| \/ | __ \
6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8 : *| (_) | |_) | __/ | | | |__| | | | | | | |
9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10 : * | |
11 : * |_|
12 : *
13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14 : *
15 : * This software is licensed as described in the file LICENSE, which
16 : * you should have received as part of this distribution. The terms
17 : * among other details are referenced in the official documentation
18 : * seen here : https://akielaries.github.io/openGPMP/ along with
19 : * important files seen in this project.
20 : *
21 : * You may opt to use, copy, modify, merge, publish, distribute
22 : * and/or sell copies of the Software, and permit persons to whom
23 : * the Software is furnished to do so, under the terms of the
24 : * LICENSE file.
25 : *
26 : *
27 : *
28 : * This software is distributed on an AS IS basis, WITHOUT
29 : * WARRANTY OF ANY KIND, either express or implied.
30 : *
31 : ************************************************************************/
32 : #include <cmath>
33 : #include <functional>
34 : #include <iostream>
35 : #include <numeric>
36 : #include <openGPMP/optim/quasi.hpp>
37 : #include <tuple>
38 : #include <vector>
39 :
40 0 : std::vector<double> gpmp::optim::QuasiNewton::vector_subtraction(
41 : const std::vector<double> &a,
42 : const std::vector<double> &b) const {
43 0 : if (a.size() != b.size()) {
44 0 : throw std::invalid_argument(
45 0 : "Error: Vector dimensions do not match for subtraction.");
46 : }
47 :
48 0 : std::vector<double> result;
49 0 : result.reserve(a.size());
50 :
51 0 : for (size_t i = 0; i < a.size(); ++i) {
52 0 : result.push_back(a[i] - b[i]);
53 : }
54 :
55 0 : return result;
56 0 : }
57 :
58 0 : std::vector<double> gpmp::optim::QuasiNewton::bhhh_optimize(
59 : const std::function<double(const std::vector<double> &)> &func,
60 : const std::vector<double> &initial_point,
61 : double tolerance,
62 : size_t max_iterations) {
63 0 : std::vector<double> current_point = initial_point;
64 0 : size_t n = initial_point.size();
65 :
66 0 : for (size_t iteration = 0; iteration < max_iterations; ++iteration) {
67 : // Calculate the gradient
68 : std::vector<double> gradient =
69 0 : calculate_gradient(func, current_point, 1e-6);
70 :
71 : // Check convergence
72 0 : double gradient_norm = 0.0;
73 0 : for (size_t i = 0; i < n; ++i) {
74 0 : gradient_norm += gradient[i] * gradient[i];
75 : }
76 0 : gradient_norm = std::sqrt(gradient_norm);
77 :
78 0 : if (gradient_norm < tolerance) {
79 0 : std::cout << "Converged after " << iteration << " iterations."
80 0 : << std::endl;
81 0 : return current_point;
82 : }
83 :
84 : // Calculate the BHHH matrix
85 : std::vector<std::vector<double>> bhhh_matrix =
86 0 : calculate_bhhh_matrix(gradient);
87 :
88 : // Update the current point
89 0 : current_point = update_point(current_point, gradient, bhhh_matrix);
90 0 : }
91 :
92 0 : std::cout << "Reached maximum iterations without convergence." << std::endl;
93 0 : return current_point;
94 0 : }
95 :
96 7 : std::vector<double> gpmp::optim::QuasiNewton::calculate_gradient(
97 : const std::function<double(const std::vector<double> &)> &func,
98 : const std::vector<double> &point,
99 : double epsilon) {
100 7 : size_t n = point.size();
101 7 : std::vector<double> gradient(n);
102 :
103 21 : for (size_t i = 0; i < n; ++i) {
104 14 : std::vector<double> perturbed_point = point;
105 14 : perturbed_point[i] += epsilon;
106 :
107 14 : double perturbed_value = func(perturbed_point);
108 14 : double original_value = func(point);
109 :
110 14 : gradient[i] = (perturbed_value - original_value) / epsilon;
111 14 : }
112 :
113 7 : return gradient;
114 0 : }
115 :
116 : std::vector<std::vector<double>>
117 4 : gpmp::optim::QuasiNewton::calculate_bhhh_matrix(
118 : const std::vector<double> &gradient) {
119 4 : size_t n = gradient.size();
120 8 : std::vector<std::vector<double>> bhhh_matrix(n, std::vector<double>(n));
121 :
122 16 : for (size_t i = 0; i < n; ++i) {
123 48 : for (size_t j = 0; j < n; ++j) {
124 36 : bhhh_matrix[i][j] = gradient[i] * gradient[j];
125 : }
126 : }
127 :
128 4 : return bhhh_matrix;
129 : }
130 :
131 0 : std::vector<double> gpmp::optim::QuasiNewton::update_point(
132 : const std::vector<double> ¤t_point,
133 : const std::vector<double> &gradient,
134 : const std::vector<std::vector<double>> &bhhh_matrix) {
135 0 : size_t n = current_point.size();
136 0 : std::vector<double> updated_point(n);
137 :
138 0 : for (size_t i = 0; i < n; ++i) {
139 0 : updated_point[i] = current_point[i] - gradient[i] / bhhh_matrix[i][i];
140 : }
141 :
142 0 : return updated_point;
143 : }
144 :
145 1 : std::vector<double> gpmp::optim::QuasiNewton::bfgs_optimize(
146 : const std::function<double(const std::vector<double> &)> &func,
147 : const std::vector<double> &initial_point,
148 : double tolerance,
149 : size_t max_iterations) {
150 1 : std::vector<double> current_point = initial_point;
151 1 : size_t n = initial_point.size();
152 :
153 : // Initialize Hessian approximation as the identity matrix
154 : std::vector<std::vector<double>> hessian_inverse(
155 : n,
156 2 : std::vector<double>(n, 0.0));
157 3 : for (size_t i = 0; i < n; ++i) {
158 2 : hessian_inverse[i][i] = 1.0;
159 : }
160 :
161 2 : for (size_t iteration = 0; iteration < max_iterations; ++iteration) {
162 : // Calculate the gradient
163 : std::vector<double> gradient =
164 2 : calculate_gradient(func, current_point, 1e-6);
165 :
166 : // Check convergence
167 2 : double gradient_norm = 0.0;
168 6 : for (size_t i = 0; i < n; ++i) {
169 4 : gradient_norm += gradient[i] * gradient[i];
170 : }
171 2 : gradient_norm = std::sqrt(gradient_norm);
172 :
173 2 : if (gradient_norm < tolerance) {
174 1 : std::cout << "Converged after " << iteration << " iterations."
175 1 : << std::endl;
176 1 : return current_point;
177 : }
178 :
179 : // Calculate search direction
180 : std::vector<double> search_direction =
181 1 : calculate_search_direction(gradient, hessian_inverse);
182 :
183 : // Perform line search to find the step size
184 1 : double step_size = line_search(func, current_point, search_direction);
185 :
186 : // Update the current point
187 : std::vector<double> next_point =
188 1 : update_point(current_point, search_direction, step_size);
189 :
190 : // Update the Hessian approximation
191 : std::vector<double> gradient_difference =
192 1 : calculate_gradient_difference(next_point, current_point, gradient);
193 2 : hessian_inverse = update_hessian_inverse(hessian_inverse,
194 : gradient_difference,
195 1 : search_direction);
196 :
197 : // Move to the next iteration
198 1 : current_point = next_point;
199 2 : }
200 :
201 0 : std::cout << "Reached maximum iterations without convergence." << std::endl;
202 0 : return current_point;
203 1 : }
204 :
205 4 : std::vector<double> gpmp::optim::QuasiNewton::calculate_search_direction(
206 : const std::vector<double> &gradient,
207 : const std::vector<std::vector<double>> &hessian_inverse) {
208 4 : size_t n = gradient.size();
209 4 : std::vector<double> search_direction(n);
210 :
211 12 : for (size_t i = 0; i < n; ++i) {
212 8 : search_direction[i] = 0.0;
213 24 : for (size_t j = 0; j < n; ++j) {
214 16 : search_direction[i] -= hessian_inverse[i][j] * gradient[j];
215 : }
216 : }
217 :
218 4 : return search_direction;
219 : }
220 :
221 3 : double gpmp::optim::QuasiNewton::line_search(
222 : const std::function<double(const std::vector<double> &)> &func,
223 : const std::vector<double> ¤t_point,
224 : const std::vector<double> &search_direction) {
225 3 : const double alpha = 0.001; // Step size multiplier
226 3 : const double beta = 0.5; // Factor for reducing the step size
227 3 : const int maxIterations = 100; // Maximum number of iterations
228 3 : const double minStepSize = 1e-6; // Minimum step size
229 :
230 3 : double step_size = 1.0; // Initial step size
231 3 : std::vector<double> updated_point = current_point;
232 :
233 : // Evaluate the objective function at the current point
234 3 : double f_current = func(current_point);
235 :
236 : // Calculate the directional derivative (gradient dot search_direction)
237 : double directional_derivative =
238 3 : dot_product(calculate_gradient(func, current_point, 1e-6),
239 : search_direction);
240 :
241 3 : int iteration = 0;
242 24 : while (step_size > minStepSize && iteration < maxIterations) {
243 : updated_point =
244 23 : update_point(current_point, search_direction, step_size);
245 23 : double f_updated = func(updated_point);
246 23 : if (f_updated <=
247 23 : f_current + alpha * step_size * directional_derivative) {
248 2 : break; // Stop if Armijo condition is satisfied
249 : }
250 21 : step_size *= beta; // Reduce the step size
251 21 : ++iteration;
252 : }
253 :
254 3 : return step_size;
255 3 : }
256 :
257 27 : std::vector<double> gpmp::optim::QuasiNewton::update_point(
258 : const std::vector<double> ¤t_point,
259 : const std::vector<double> &search_direction,
260 : double step_size) {
261 27 : size_t n = current_point.size();
262 27 : std::vector<double> updated_point(n);
263 :
264 81 : for (size_t i = 0; i < n; ++i) {
265 54 : updated_point[i] = current_point[i] + step_size * search_direction[i];
266 : }
267 :
268 27 : return updated_point;
269 : }
270 :
271 1 : std::vector<double> gpmp::optim::QuasiNewton::calculate_gradient_difference(
272 : const std::vector<double> &next_point,
273 : const std::vector<double> ¤t_point,
274 : const std::vector<double> &gradient) {
275 1 : size_t n = next_point.size();
276 1 : std::vector<double> gradient_difference(n);
277 :
278 3 : for (size_t i = 0; i < n; ++i) {
279 2 : gradient_difference[i] =
280 2 : gradient[i] * (next_point[i] - current_point[i]);
281 : }
282 :
283 1 : return gradient_difference;
284 : }
285 :
286 : std::vector<std::vector<double>>
287 2 : gpmp::optim::QuasiNewton::update_hessian_inverse(
288 : const std::vector<std::vector<double>> &hessian_inverse,
289 : const std::vector<double> &gradient_difference,
290 : const std::vector<double> &search_direction) {
291 :
292 2 : size_t n = hessian_inverse.size();
293 : std::vector<std::vector<double>> updated_hessian_inverse(
294 : n,
295 4 : std::vector<double>(n));
296 :
297 : // Update Hessian using BFGS update formula
298 2 : double rho = dot_product(gradient_difference, search_direction);
299 :
300 6 : for (size_t i = 0; i < n; ++i) {
301 12 : for (size_t j = 0; j < n; ++j) {
302 8 : updated_hessian_inverse[i][j] =
303 8 : hessian_inverse[i][j] +
304 8 : rho * gradient_difference[i] * gradient_difference[j];
305 : }
306 : }
307 :
308 2 : return updated_hessian_inverse;
309 0 : }
310 :
311 9 : double gpmp::optim::QuasiNewton::dot_product(const std::vector<double> &a,
312 : const std::vector<double> &b) {
313 : // Ensure vectors have the same size
314 9 : if (a.size() != b.size()) {
315 0 : throw std::invalid_argument(
316 0 : "Vectors must have the same size for dot product.");
317 : }
318 :
319 9 : double result = 0.0;
320 31 : for (size_t i = 0; i < a.size(); ++i) {
321 22 : result += a[i] * b[i];
322 : }
323 9 : return result;
324 : }
325 :
326 : std::tuple<std::vector<double>, double>
327 2 : gpmp::optim::QuasiNewton::lbfgs_optimize(
328 : const std::function<double(const std::vector<double> &)> &f,
329 : const std::vector<double> &initial_point,
330 : double tolerance,
331 : size_t max_iterations,
332 : size_t memory_size) {
333 :
334 2 : const double eps = 1e-8;
335 :
336 2 : size_t n = initial_point.size();
337 2 : std::vector<double> x = initial_point;
338 2 : std::vector<double> g(n); // Gradient vector
339 : std::vector<std::vector<double>> s(memory_size,
340 4 : std::vector<double>(n)); // s vectors
341 : std::vector<std::vector<double>> y(memory_size,
342 4 : std::vector<double>(n)); // y vectors
343 2 : std::vector<double> rho(memory_size); // rho values
344 :
345 : // Evaluate the objective function and gradient at initial_point
346 2 : double fx = f(x);
347 : // Calculate gradient at initial_point
348 : // Gradient calculation logic to be implemented
349 : // Assign gradient to 'g'
350 :
351 2 : for (size_t iter = 0; iter < max_iterations; ++iter) {
352 : // Check for convergence
353 2 : double norm_grad = 0.0;
354 6 : for (size_t i = 0; i < n; ++i) {
355 4 : norm_grad += g[i] * g[i];
356 : }
357 2 : norm_grad = sqrt(norm_grad);
358 2 : if (norm_grad < tolerance) {
359 2 : break;
360 : }
361 :
362 : // Compute search direction (use initial guess)
363 0 : std::vector<double> d = g;
364 :
365 : // L-BFGS two-loop recursion
366 0 : size_t start = std::min(iter, memory_size);
367 : // for (size_t i = start - 1; i >= 0; --i) {
368 0 : for (size_t i = start; i > 0; --i) {
369 :
370 0 : rho[i] = 1.0 /
371 0 : inner_product(s[i].begin(), s[i].end(), y[i].begin(), 0.0);
372 : double alpha =
373 0 : rho[i] *
374 0 : inner_product(s[i].begin(), s[i].end(), d.begin(), 0.0);
375 0 : for (size_t j = 0; j < n; ++j) {
376 0 : d[j] -= alpha * y[i][j];
377 : }
378 : }
379 :
380 : // Perform scaling
381 0 : for (size_t i = 0; i < n; ++i) {
382 0 : d[i] *= rho[i];
383 : }
384 :
385 : // Compute gradient of the objective function along the search direction
386 : // Gradient calculation logic to be implemented
387 : // Assign gradient to 'dg'
388 0 : double dg = inner_product(d.begin(), d.end(), g.begin(), 0.0);
389 :
390 : // Limit curvature
391 0 : if (dg > 0) {
392 0 : break;
393 : }
394 :
395 : // Line search
396 0 : double step_size = 1.0;
397 0 : std::vector<double> x_new = x;
398 0 : for (size_t i = 0; i < n; ++i) {
399 0 : x_new[i] += step_size * d[i];
400 : }
401 :
402 0 : double fx_new = f(x_new);
403 0 : if (fx_new < fx + eps * step_size * dg) {
404 : // Update x
405 0 : x = x_new;
406 0 : fx = fx_new;
407 :
408 : // Evaluate gradient at new point
409 : // Gradient calculation logic to be implemented
410 : // Assign gradient to 'g'
411 :
412 : // Update s and y
413 0 : for (size_t i = 0; i < n; ++i) {
414 0 : s[iter % memory_size][i] = x_new[i] - x[i];
415 0 : y[iter % memory_size][i] = g[i] - d[i];
416 : }
417 : }
418 0 : }
419 :
420 4 : return std::make_tuple(x, fx);
421 2 : }
|