Line data Source code
1 : /************************************************************************* 2 : * 3 : * Project 4 : * _____ _____ __ __ _____ 5 : * / ____| __ \| \/ | __ \ 6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) | 7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/ 8 : *| (_) | |_) | __/ | | | |__| | | | | | | | 9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_| 10 : * | | 11 : * |_| 12 : * 13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al. 14 : * 15 : * This software is licensed as described in the file LICENSE, which 16 : * you should have received as part of this distribution. The terms 17 : * among other details are referenced in the official documentation 18 : * seen here : https://akielaries.github.io/openGPMP/ along with 19 : * important files seen in this project. 20 : * 21 : * You may opt to use, copy, modify, merge, publish, distribute 22 : * and/or sell copies of the Software, and permit persons to whom 23 : * the Software is furnished to do so, under the terms of the 24 : * LICENSE file. As this is an Open Source effort, all implementations 25 : * must be of the same methodology. 26 : * 27 : * 28 : * 29 : * This software is distributed on an AS IS basis, WITHOUT 30 : * WARRANTY OF ANY KIND, either express or implied. 31 : * 32 : ************************************************************************/ 33 : #include <algorithm> 34 : #include <cmath> 35 : #include <openGPMP/ml/knn.hpp> 36 : #include <stdexcept> 37 : #include <unordered_map> 38 : 39 0 : gpmp::ml::KNN::KNN() { 40 0 : } 41 : 42 0 : gpmp::ml::KNN::~KNN() { 43 0 : } 44 : 45 0 : void gpmp::ml::KNN::train( 46 : const std::vector<std::vector<double>> &_training_data, 47 : const std::vector<int> &_labels) { 48 0 : if (_training_data.size() != _labels.size() || _training_data.empty()) { 49 0 : throw std::invalid_argument("Invalid training data or labels."); 50 : } 51 : 52 0 : this->training_data = _training_data; 53 0 : this->labels = _labels; 54 0 : } 55 : 56 0 : int gpmp::ml::KNN::predict(const std::vector<double> &input_vector, int k) { 57 0 : if (training_data.empty() || labels.empty()) { 58 0 : throw std::logic_error( 59 0 : "Model not trained. Call train() before predict."); 60 : } 61 : 62 0 : if (input_vector.size() != training_data[0].size()) { 63 0 : throw std::invalid_argument("Invalid input vector size."); 64 : } 65 : 66 : // if (k <= 0 || k > training_data.size()) { 67 0 : if (k <= 0 || static_cast<size_t>(k) > training_data.size()) { 68 0 : throw std::invalid_argument("Invalid value of k."); 69 : } 70 : 71 : // Calculate distances and store index-label pairs 72 0 : std::vector<std::pair<double, int>> distances; 73 0 : for (size_t i = 0; i < training_data.size(); ++i) { 74 : double distance = 75 0 : calculateEuclideanDistance(input_vector, training_data[i]); 76 0 : distances.emplace_back(distance, labels[i]); 77 : } 78 : 79 : // Sort distances in ascending order 80 0 : std::sort( 81 : distances.begin(), 82 : distances.end(), 83 0 : [](const std::pair<double, int> &a, const std::pair<double, int> &b) { 84 0 : return a.first < b.first; 85 : }); 86 : 87 : // Count votes for each label among the k nearest neighbors 88 0 : std::unordered_map<int, int> label_counts; 89 0 : for (int i = 0; i < k; ++i) { 90 0 : label_counts[distances[i].second]++; 91 : } 92 : 93 : // Find the label with the maximum votes 94 0 : int max_votes = -1; 95 0 : int predicted_label = -1; 96 0 : for (const auto &entry : label_counts) { 97 0 : if (entry.second > max_votes) { 98 0 : max_votes = entry.second; 99 0 : predicted_label = entry.first; 100 : } 101 : } 102 : 103 0 : return predicted_label; 104 0 : } 105 : 106 : double 107 0 : gpmp::ml::KNN::calculateEuclideanDistance(const std::vector<double> &vec1, 108 : const std::vector<double> &vec2) { 109 0 : double distance = 0.0; 110 0 : for (size_t i = 0; i < vec1.size(); ++i) { 111 0 : double diff = vec1[i] - vec2[i]; 112 0 : distance += diff * diff; 113 : } 114 0 : return std::sqrt(distance); 115 : }