Line data Source code
1 : /************************************************************************* 2 : * 3 : * Project 4 : * _____ _____ __ __ _____ 5 : * / ____| __ \| \/ | __ \ 6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) | 7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/ 8 : *| (_) | |_) | __/ | | | |__| | | | | | | | 9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_| 10 : * | | 11 : * |_| 12 : * 13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al. 14 : * 15 : * This software is licensed as described in the file LICENSE, which 16 : * you should have received as part of this distribution. The terms 17 : * among other details are referenced in the official documentation 18 : * seen here : https://akielaries.github.io/openGPMP/ along with 19 : * important files seen in this project. 20 : * 21 : * You may opt to use, copy, modify, merge, publish, distribute 22 : * and/or sell copies of the Software, and permit persons to whom 23 : * the Software is furnished to do so, under the terms of the 24 : * LICENSE file. As this is an Open Source effort, all implementations 25 : * must be of the same methodology. 26 : * 27 : * 28 : * 29 : * This software is distributed on an AS IS basis, WITHOUT 30 : * WARRANTY OF ANY KIND, either express or implied. 31 : * 32 : ************************************************************************/ 33 : #include <openGPMP/ml/trainers.hpp> 34 : #include <random> 35 : 36 : std::vector<double> 37 0 : gpmp::ml::Trainers::gradientdesc(const std::vector<std::vector<double>> &X, 38 : const std::vector<double> &y, 39 : double alpha, 40 : int num_iters) { 41 0 : int m = X.size(); // Number of training examples 42 0 : int n = X[0].size(); // Number of features 43 0 : std::vector<double> theta(n, 0.0); // Initialize parameters 44 : 45 : // Perform gradient descent 46 0 : for (int iter = 0; iter < num_iters; iter++) { 47 : std::vector<double> delta_theta(n, 48 0 : 0.0); // Initialize change in parameters 49 : 50 : // Compute predictions and errors 51 0 : for (int i = 0; i < m; i++) { 52 0 : double prediction = 0; 53 0 : for (int j = 0; j < n; j++) { 54 0 : prediction += theta[j] * X[i][j]; 55 : } 56 0 : double error = prediction - y[i]; 57 : 58 : // Update delta_theta 59 0 : for (int j = 0; j < n; j++) { 60 0 : delta_theta[j] += error * X[i][j]; 61 : } 62 : } 63 : 64 : // Update parameters 65 0 : for (int j = 0; j < n; j++) { 66 0 : theta[j] -= (alpha / m) * delta_theta[j]; 67 : } 68 0 : } 69 : 70 0 : return theta; 71 0 : } 72 : 73 0 : std::vector<double> gpmp::ml::Trainers::stoch_gradientdesc( 74 : const std::vector<std::vector<double>> &X, 75 : const std::vector<double> &y, 76 : double alpha, 77 : int num_iters) { 78 0 : int m = X.size(); // Number of training examples 79 0 : int n = X[0].size(); // Number of features 80 0 : std::vector<double> theta(n, 0.0); // Initialize parameters 81 : 82 0 : std::default_random_engine generator; 83 0 : std::uniform_int_distribution<int> distribution(0, m - 1); 84 : 85 0 : for (int iter = 0; iter < num_iters; iter++) { 86 : int random_index = 87 0 : distribution(generator); // Choose a random training example 88 0 : double prediction = 0; 89 : 90 0 : for (int j = 0; j < n; j++) { 91 0 : prediction += theta[j] * X[random_index][j]; 92 : } 93 : 94 0 : double error = prediction - y[random_index]; 95 : 96 0 : for (int j = 0; j < n; j++) { 97 0 : theta[j] -= (alpha * error * X[random_index][j]); 98 : } 99 : } 100 : 101 0 : return theta; 102 0 : } 103 0 : std::vector<double> gpmp::ml::Trainers::minibatch_gradientdesc( 104 : const std::vector<std::vector<double>> &X, 105 : const std::vector<double> &y, 106 : double alpha, 107 : int num_iters, 108 : int batch_size) { 109 0 : int m = X.size(); // Number of training examples 110 0 : int n = X[0].size(); // Number of features 111 0 : std::vector<double> theta(n, 0.0); // Initialize parameters 112 : 113 0 : std::default_random_engine generator; 114 0 : std::uniform_int_distribution<int> distribution(0, m - 1); 115 : 116 0 : for (int iter = 0; iter < num_iters; iter++) { 117 : // Select random batch indices 118 0 : std::vector<int> batch_indices; 119 0 : for (int i = 0; i < batch_size; i++) { 120 0 : batch_indices.push_back(distribution(generator)); 121 : } 122 : 123 : // Compute gradient for the batch 124 0 : std::vector<double> delta_theta(n, 0.0); 125 0 : for (int idx : batch_indices) { 126 0 : double prediction = 0; 127 0 : for (int j = 0; j < n; j++) { 128 0 : prediction += theta[j] * X[idx][j]; 129 : } 130 0 : double error = prediction - y[idx]; 131 : 132 0 : for (int j = 0; j < n; j++) { 133 0 : delta_theta[j] += (error * X[idx][j]); 134 : } 135 : } 136 : 137 : // Update parameters using the gradient of the batch 138 0 : for (int j = 0; j < n; j++) { 139 0 : theta[j] -= (alpha / batch_size) * delta_theta[j]; 140 : } 141 0 : } 142 : 143 0 : return theta; 144 0 : }