41 const std::vector<double> &a,
42 const std::vector<double> &b)
const {
43 if (a.size() != b.size()) {
44 throw std::invalid_argument(
45 "Error: Vector dimensions do not match for subtraction.");
48 std::vector<double> result;
49 result.reserve(a.size());
51 for (
size_t i = 0; i < a.size(); ++i) {
52 result.push_back(a[i] - b[i]);
59 const std::function<
double(
const std::vector<double> &)> &func,
60 const std::vector<double> &initial_point,
62 size_t max_iterations) {
63 std::vector<double> current_point = initial_point;
64 size_t n = initial_point.size();
66 for (
size_t iteration = 0; iteration < max_iterations; ++iteration) {
68 std::vector<double> gradient =
69 calculate_gradient(func, current_point, 1e-6);
72 double gradient_norm = 0.0;
73 for (
size_t i = 0; i < n; ++i) {
74 gradient_norm += gradient[i] * gradient[i];
76 gradient_norm = std::sqrt(gradient_norm);
78 if (gradient_norm < tolerance) {
79 std::cout <<
"Converged after " << iteration <<
" iterations."
85 std::vector<std::vector<double>> bhhh_matrix =
86 calculate_bhhh_matrix(gradient);
89 current_point = update_point(current_point, gradient, bhhh_matrix);
92 std::cout <<
"Reached maximum iterations without convergence." << std::endl;
97 const std::function<
double(
const std::vector<double> &)> &func,
98 const std::vector<double> &point,
100 size_t n = point.size();
101 std::vector<double> gradient(n);
103 for (
size_t i = 0; i < n; ++i) {
104 std::vector<double> perturbed_point = point;
105 perturbed_point[i] += epsilon;
107 double perturbed_value = func(perturbed_point);
108 double original_value = func(point);
110 gradient[i] = (perturbed_value - original_value) / epsilon;
116 std::vector<std::vector<double>>
118 const std::vector<double> &gradient) {
119 size_t n = gradient.size();
120 std::vector<std::vector<double>> bhhh_matrix(n, std::vector<double>(n));
122 for (
size_t i = 0; i < n; ++i) {
123 for (
size_t j = 0; j < n; ++j) {
124 bhhh_matrix[i][j] = gradient[i] * gradient[j];
132 const std::vector<double> ¤t_point,
133 const std::vector<double> &gradient,
134 const std::vector<std::vector<double>> &bhhh_matrix) {
135 size_t n = current_point.size();
136 std::vector<double> updated_point(n);
138 for (
size_t i = 0; i < n; ++i) {
139 updated_point[i] = current_point[i] - gradient[i] / bhhh_matrix[i][i];
142 return updated_point;
146 const std::function<
double(
const std::vector<double> &)> &func,
147 const std::vector<double> &initial_point,
149 size_t max_iterations) {
150 std::vector<double> current_point = initial_point;
151 size_t n = initial_point.size();
154 std::vector<std::vector<double>> hessian_inverse(
156 std::vector<double>(n, 0.0));
157 for (
size_t i = 0; i < n; ++i) {
158 hessian_inverse[i][i] = 1.0;
161 for (
size_t iteration = 0; iteration < max_iterations; ++iteration) {
163 std::vector<double> gradient =
164 calculate_gradient(func, current_point, 1e-6);
167 double gradient_norm = 0.0;
168 for (
size_t i = 0; i < n; ++i) {
169 gradient_norm += gradient[i] * gradient[i];
171 gradient_norm = std::sqrt(gradient_norm);
173 if (gradient_norm < tolerance) {
174 std::cout <<
"Converged after " << iteration <<
" iterations."
176 return current_point;
180 std::vector<double> search_direction =
181 calculate_search_direction(gradient, hessian_inverse);
184 double step_size = line_search(func, current_point, search_direction);
187 std::vector<double> next_point =
188 update_point(current_point, search_direction, step_size);
191 std::vector<double> gradient_difference =
192 calculate_gradient_difference(next_point, current_point, gradient);
193 hessian_inverse = update_hessian_inverse(hessian_inverse,
198 current_point = next_point;
201 std::cout <<
"Reached maximum iterations without convergence." << std::endl;
202 return current_point;
206 const std::vector<double> &gradient,
207 const std::vector<std::vector<double>> &hessian_inverse) {
208 size_t n = gradient.size();
209 std::vector<double> search_direction(n);
211 for (
size_t i = 0; i < n; ++i) {
212 search_direction[i] = 0.0;
213 for (
size_t j = 0; j < n; ++j) {
214 search_direction[i] -= hessian_inverse[i][j] * gradient[j];
218 return search_direction;
222 const std::function<
double(
const std::vector<double> &)> &func,
223 const std::vector<double> ¤t_point,
224 const std::vector<double> &search_direction) {
225 const double alpha = 0.001;
226 const double beta = 0.5;
227 const int maxIterations = 100;
228 const double minStepSize = 1e-6;
230 double step_size = 1.0;
231 std::vector<double> updated_point = current_point;
234 double f_current = func(current_point);
237 double directional_derivative =
238 dot_product(calculate_gradient(func, current_point, 1e-6),
242 while (step_size > minStepSize && iteration < maxIterations) {
244 update_point(current_point, search_direction, step_size);
245 double f_updated = func(updated_point);
247 f_current + alpha * step_size * directional_derivative) {
258 const std::vector<double> ¤t_point,
259 const std::vector<double> &search_direction,
261 size_t n = current_point.size();
262 std::vector<double> updated_point(n);
264 for (
size_t i = 0; i < n; ++i) {
265 updated_point[i] = current_point[i] + step_size * search_direction[i];
268 return updated_point;
272 const std::vector<double> &next_point,
273 const std::vector<double> ¤t_point,
274 const std::vector<double> &gradient) {
275 size_t n = next_point.size();
276 std::vector<double> gradient_difference(n);
278 for (
size_t i = 0; i < n; ++i) {
279 gradient_difference[i] =
280 gradient[i] * (next_point[i] - current_point[i]);
283 return gradient_difference;
286 std::vector<std::vector<double>>
288 const std::vector<std::vector<double>> &hessian_inverse,
289 const std::vector<double> &gradient_difference,
290 const std::vector<double> &search_direction) {
292 size_t n = hessian_inverse.size();
293 std::vector<std::vector<double>> updated_hessian_inverse(
295 std::vector<double>(n));
298 double rho =
dot_product(gradient_difference, search_direction);
300 for (
size_t i = 0; i < n; ++i) {
301 for (
size_t j = 0; j < n; ++j) {
302 updated_hessian_inverse[i][j] =
303 hessian_inverse[i][j] +
304 rho * gradient_difference[i] * gradient_difference[j];
308 return updated_hessian_inverse;
312 const std::vector<double> &b) {
314 if (a.size() != b.size()) {
315 throw std::invalid_argument(
316 "Vectors must have the same size for dot product.");
320 for (
size_t i = 0; i < a.size(); ++i) {
321 result += a[i] * b[i];
326 std::tuple<std::vector<double>,
double>
328 const std::function<
double(
const std::vector<double> &)> &f,
329 const std::vector<double> &initial_point,
331 size_t max_iterations,
332 size_t memory_size) {
334 const double eps = 1e-8;
336 size_t n = initial_point.size();
337 std::vector<double> x = initial_point;
338 std::vector<double> g(n);
339 std::vector<std::vector<double>> s(memory_size,
340 std::vector<double>(n));
341 std::vector<std::vector<double>> y(memory_size,
342 std::vector<double>(n));
343 std::vector<double> rho(memory_size);
351 for (
size_t iter = 0; iter < max_iterations; ++iter) {
353 double norm_grad = 0.0;
354 for (
size_t i = 0; i < n; ++i) {
355 norm_grad += g[i] * g[i];
357 norm_grad = sqrt(norm_grad);
358 if (norm_grad < tolerance) {
363 std::vector<double> d = g;
366 size_t start = std::min(iter, memory_size);
368 for (
size_t i = start; i > 0; --i) {
371 inner_product(s[i].begin(), s[i].end(), y[i].begin(), 0.0);
374 inner_product(s[i].begin(), s[i].end(), d.begin(), 0.0);
375 for (
size_t j = 0; j < n; ++j) {
376 d[j] -= alpha * y[i][j];
381 for (
size_t i = 0; i < n; ++i) {
388 double dg = inner_product(d.begin(), d.end(), g.begin(), 0.0);
396 double step_size = 1.0;
397 std::vector<double> x_new = x;
398 for (
size_t i = 0; i < n; ++i) {
399 x_new[i] += step_size * d[i];
402 double fx_new = f(x_new);
403 if (fx_new < fx + eps * step_size * dg) {
413 for (
size_t i = 0; i < n; ++i) {
414 s[iter % memory_size][i] = x_new[i] - x[i];
415 y[iter % memory_size][i] = g[i] - d[i];
420 return std::make_tuple(x, fx);
double line_search(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > ¤t_point, const std::vector< double > &search_direction)
Perform line search to find an appropriate step size.
std::vector< double > bhhh_optimize(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &initial_point, double tolerance, size_t max_iterations)
Optimize a function using the Berndt–Hall–Hall–Hausman (BHHH) algorithm.
std::vector< double > calculate_search_direction(const std::vector< double > &gradient, const std::vector< std::vector< double >> &hessian_inverse)
Calculate the search direction using the BFGS method.
std::vector< std::vector< double > > calculate_bhhh_matrix(const std::vector< double > &gradient)
Calculate the BHHH matrix from the gradient.
std::vector< double > calculate_gradient_difference(const std::vector< double > &next_point, const std::vector< double > ¤t_point, const std::vector< double > &gradient)
Calculate the gradient difference between two points.
double dot_product(const std::vector< double > &a, const std::vector< double > &b)
Calculate the dot product of two vectors.
std::vector< double > calculate_gradient(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &point, double epsilon)
Calculate the gradient of a function at a given point.
std::vector< double > update_point(const std::vector< double > ¤t_point, const std::vector< double > &gradient, const std::vector< std::vector< double >> &bhhh_matrix)
Update the current point using the BHHH matrix.
std::tuple< std::vector< double >, double > lbfgs_optimize(const std::function< double(const std::vector< double > &)> &f, const std::vector< double > &initial_point, double tolerance=1e-4, size_t max_iterations=100, size_t memory_size=5)
Optimize a function using the Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm.
std::vector< double > bfgs_optimize(const std::function< double(const std::vector< double > &)> &func, const std::vector< double > &initial_point, double tolerance, size_t max_iterations)
Optimize a function using the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm.
std::vector< std::vector< double > > update_hessian_inverse(const std::vector< std::vector< double >> &hessian_inverse, const std::vector< double > &gradient_difference, const std::vector< double > &search_direction)
Update the inverse of the Hessian matrix using the BFGS method.
std::vector< double > vector_subtraction(const std::vector< double > &a, const std::vector< double > &b) const
Subtract two vectors element-wise.
int dot_product(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2)
Computes the dot product for vectors of signed 8-bit integers.