Line data Source code
1 : /************************************************************************* 2 : * 3 : * Project 4 : * _____ _____ __ __ _____ 5 : * / ____| __ \| \/ | __ \ 6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) | 7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/ 8 : *| (_) | |_) | __/ | | | |__| | | | | | | | 9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_| 10 : * | | 11 : * |_| 12 : * 13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al. 14 : * 15 : * This software is licensed as described in the file LICENSE, which 16 : * you should have received as part of this distribution. The terms 17 : * among other details are referenced in the official documentation 18 : * seen here : https://akielaries.github.io/openGPMP/ along with 19 : * important files seen in this project. 20 : * 21 : * You may opt to use, copy, modify, merge, publish, distribute 22 : * and/or sell copies of the Software, and permit persons to whom 23 : * the Software is furnished to do so, under the terms of the 24 : * LICENSE file. As this is an Open Source effort, all implementations 25 : * must be of the same methodology. 26 : * 27 : * 28 : * 29 : * This software is distributed on an AS IS basis, WITHOUT 30 : * WARRANTY OF ANY KIND, either express or implied. 31 : * 32 : ************************************************************************/ 33 : #include <algorithm> 34 : #include <cmath> 35 : #include <openGPMP/ml/regularizers.hpp> 36 : #include <random> 37 : 38 : double 39 0 : gpmp::ml::Regularize::l1_regularization(const std::vector<double> &weights, 40 : double lambda) { 41 0 : double penalty = 0.0; 42 0 : for (double weight : weights) { 43 0 : penalty += std::abs(weight); 44 : } 45 0 : return lambda * penalty; 46 : } 47 : 48 : double 49 0 : gpmp::ml::Regularize::l2_regularization(const std::vector<double> &weights, 50 : double lambda) { 51 0 : double penalty = 0.0; 52 0 : for (double weight : weights) { 53 0 : penalty += weight * weight; 54 : } 55 0 : return 0.5 * lambda * penalty; 56 : } 57 : 58 0 : double gpmp::ml::Regularize::elastic_net_regularization( 59 : const std::vector<double> &weights, 60 : double lambda1, 61 : double lambda2) { 62 0 : double l1_penalty = l1_regularization(weights, lambda1); 63 0 : double l2_penalty = l2_regularization(weights, lambda2); 64 0 : return l1_penalty + l2_penalty; 65 : } 66 : 67 0 : double gpmp::ml::Regularize::dropout_regularization(double dropout_rate, 68 : int num_neurons) { 69 0 : return 0.5 * dropout_rate * num_neurons; 70 : } 71 : 72 0 : bool gpmp::ml::Regularize::early_stopping(double current_val_loss, 73 : double &best_val_loss, 74 : int patience, 75 : int epoch) { 76 0 : if (current_val_loss < best_val_loss) { 77 0 : best_val_loss = current_val_loss; 78 0 : patience = epoch + patience; // Reset patience 79 : } else { 80 0 : if (epoch >= patience) { 81 0 : return true; // Stop training 82 : } 83 : } 84 0 : return false; // Continue training 85 : } 86 : 87 0 : std::vector<double> gpmp::ml::Regularize::ensemble_predictions( 88 : const std::vector<std::vector<double>> &predictions) { 89 0 : std::vector<double> ensemble; 90 0 : if (!predictions.empty()) { 91 0 : ensemble.resize(predictions.front().size(), 0.0); 92 0 : for (const auto &prediction : predictions) { 93 0 : for (size_t i = 0; i < prediction.size(); ++i) { 94 0 : ensemble[i] += prediction[i]; 95 : } 96 : } 97 0 : for (auto &val : ensemble) { 98 0 : val /= predictions.size(); 99 : } 100 : } 101 0 : return ensemble; 102 0 : } 103 : 104 0 : void gpmp::ml::Regularize::max_norm_regularization(std::vector<double> &weights, 105 : double max_norm) { 106 0 : double norm = 0.0; 107 0 : for (double &weight : weights) { 108 0 : norm += weight * weight; 109 : } 110 0 : norm = sqrt(norm); 111 0 : if (norm > max_norm) { 112 0 : double factor = max_norm / norm; 113 0 : for (double &weight : weights) { 114 0 : weight *= factor; 115 : } 116 : } 117 0 : } 118 : 119 0 : void gpmp::ml::Regularize::weight_decay_regularization( 120 : std::vector<double> &weights, 121 : double lambda) { 122 0 : for (double &weight : weights) { 123 0 : weight *= (1.0 - lambda); 124 : } 125 0 : } 126 : 127 0 : std::vector<std::vector<double>> gpmp::ml::Regularize::batch_normalization( 128 : const std::vector<std::vector<double>> &input_data, 129 : double epsilon, 130 : double scale, 131 : double shift) { 132 0 : std::vector<std::vector<double>> normalized_data; 133 0 : for (const auto &instance : input_data) { 134 0 : double mean = 0.0; 135 0 : for (double val : instance) { 136 0 : mean += val; 137 : } 138 0 : mean /= instance.size(); 139 : 140 0 : double variance = 0.0; 141 0 : for (double val : instance) { 142 0 : variance += (val - mean) * (val - mean); 143 : } 144 0 : variance /= instance.size(); 145 : 146 0 : double std_dev = sqrt(variance + epsilon); 147 : 148 0 : std::vector<double> normalized_instance; 149 0 : for (double val : instance) { 150 0 : double normalized_val = scale * ((val - mean) / std_dev) + shift; 151 0 : normalized_instance.push_back(normalized_val); 152 : } 153 0 : normalized_data.push_back(normalized_instance); 154 0 : } 155 0 : return normalized_data; 156 0 : } 157 : 158 0 : std::vector<std::vector<double>> gpmp::ml::Regularize::data_augmentation( 159 : const std::vector<std::vector<double>> &input_data, 160 : int augmentation_factor) { 161 0 : std::vector<std::vector<double>> augmented_data; 162 0 : std::random_device rd; 163 0 : std::mt19937 gen(rd()); 164 : 165 0 : for (const auto &instance : input_data) { 166 0 : augmented_data.push_back(instance); 167 0 : for (int i = 1; i < augmentation_factor; ++i) { 168 0 : std::vector<double> augmented_instance = instance; 169 0 : std::shuffle(augmented_instance.begin(), 170 : augmented_instance.end(), 171 : gen); 172 0 : augmented_data.push_back(augmented_instance); 173 0 : } 174 : } 175 0 : return augmented_data; 176 0 : }