openGPMP
Open Source Mathematics Package
trainers.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 #include <openGPMP/ml/trainers.hpp>
34 #include <random>
35 
36 std::vector<double>
37 gpmp::ml::Trainers::gradientdesc(const std::vector<std::vector<double>> &X,
38  const std::vector<double> &y,
39  double alpha,
40  int num_iters) {
41  int m = X.size(); // Number of training examples
42  int n = X[0].size(); // Number of features
43  std::vector<double> theta(n, 0.0); // Initialize parameters
44 
45  // Perform gradient descent
46  for (int iter = 0; iter < num_iters; iter++) {
47  std::vector<double> delta_theta(n,
48  0.0); // Initialize change in parameters
49 
50  // Compute predictions and errors
51  for (int i = 0; i < m; i++) {
52  double prediction = 0;
53  for (int j = 0; j < n; j++) {
54  prediction += theta[j] * X[i][j];
55  }
56  double error = prediction - y[i];
57 
58  // Update delta_theta
59  for (int j = 0; j < n; j++) {
60  delta_theta[j] += error * X[i][j];
61  }
62  }
63 
64  // Update parameters
65  for (int j = 0; j < n; j++) {
66  theta[j] -= (alpha / m) * delta_theta[j];
67  }
68  }
69 
70  return theta;
71 }
72 
74  const std::vector<std::vector<double>> &X,
75  const std::vector<double> &y,
76  double alpha,
77  int num_iters) {
78  int m = X.size(); // Number of training examples
79  int n = X[0].size(); // Number of features
80  std::vector<double> theta(n, 0.0); // Initialize parameters
81 
82  std::default_random_engine generator;
83  std::uniform_int_distribution<int> distribution(0, m - 1);
84 
85  for (int iter = 0; iter < num_iters; iter++) {
86  int random_index =
87  distribution(generator); // Choose a random training example
88  double prediction = 0;
89 
90  for (int j = 0; j < n; j++) {
91  prediction += theta[j] * X[random_index][j];
92  }
93 
94  double error = prediction - y[random_index];
95 
96  for (int j = 0; j < n; j++) {
97  theta[j] -= (alpha * error * X[random_index][j]);
98  }
99  }
100 
101  return theta;
102 }
104  const std::vector<std::vector<double>> &X,
105  const std::vector<double> &y,
106  double alpha,
107  int num_iters,
108  int batch_size) {
109  int m = X.size(); // Number of training examples
110  int n = X[0].size(); // Number of features
111  std::vector<double> theta(n, 0.0); // Initialize parameters
112 
113  std::default_random_engine generator;
114  std::uniform_int_distribution<int> distribution(0, m - 1);
115 
116  for (int iter = 0; iter < num_iters; iter++) {
117  // Select random batch indices
118  std::vector<int> batch_indices;
119  for (int i = 0; i < batch_size; i++) {
120  batch_indices.push_back(distribution(generator));
121  }
122 
123  // Compute gradient for the batch
124  std::vector<double> delta_theta(n, 0.0);
125  for (int idx : batch_indices) {
126  double prediction = 0;
127  for (int j = 0; j < n; j++) {
128  prediction += theta[j] * X[idx][j];
129  }
130  double error = prediction - y[idx];
131 
132  for (int j = 0; j < n; j++) {
133  delta_theta[j] += (error * X[idx][j]);
134  }
135  }
136 
137  // Update parameters using the gradient of the batch
138  for (int j = 0; j < n; j++) {
139  theta[j] -= (alpha / batch_size) * delta_theta[j];
140  }
141  }
142 
143  return theta;
144 }
std::vector< double > minibatch_gradientdesc(const std::vector< std::vector< double >> &X, const std::vector< double > &y, double alpha, int num_iters, int batch_size)
Perform mini-batch gradient descent for linear regression.
Definition: trainers.cpp:103
std::vector< double > gradientdesc(const std::vector< std::vector< double >> &X, const std::vector< double > &y, double alpha, int num_iters)
Perform gradient descent for linear regression.
Definition: trainers.cpp:37
std::vector< double > stoch_gradientdesc(const std::vector< std::vector< double >> &X, const std::vector< double > &y, double alpha, int num_iters)
Perform stochastic gradient descent for linear regression.
Definition: trainers.cpp:73