openGPMP
Open Source Mathematics Package
knn.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 #include <algorithm>
34 #include <cmath>
35 #include <openGPMP/ml/knn.hpp>
36 #include <stdexcept>
37 #include <unordered_map>
38 
40 }
41 
43 }
44 
46  const std::vector<std::vector<double>> &_training_data,
47  const std::vector<int> &_labels) {
48  if (_training_data.size() != _labels.size() || _training_data.empty()) {
49  throw std::invalid_argument("Invalid training data or labels.");
50  }
51 
52  this->training_data = _training_data;
53  this->labels = _labels;
54 }
55 
56 int gpmp::ml::KNN::predict(const std::vector<double> &input_vector, int k) {
57  if (training_data.empty() || labels.empty()) {
58  throw std::logic_error(
59  "Model not trained. Call train() before predict.");
60  }
61 
62  if (input_vector.size() != training_data[0].size()) {
63  throw std::invalid_argument("Invalid input vector size.");
64  }
65 
66  // if (k <= 0 || k > training_data.size()) {
67  if (k <= 0 || static_cast<size_t>(k) > training_data.size()) {
68  throw std::invalid_argument("Invalid value of k.");
69  }
70 
71  // Calculate distances and store index-label pairs
72  std::vector<std::pair<double, int>> distances;
73  for (size_t i = 0; i < training_data.size(); ++i) {
74  double distance =
75  calculateEuclideanDistance(input_vector, training_data[i]);
76  distances.emplace_back(distance, labels[i]);
77  }
78 
79  // Sort distances in ascending order
80  std::sort(
81  distances.begin(),
82  distances.end(),
83  [](const std::pair<double, int> &a, const std::pair<double, int> &b) {
84  return a.first < b.first;
85  });
86 
87  // Count votes for each label among the k nearest neighbors
88  std::unordered_map<int, int> label_counts;
89  for (int i = 0; i < k; ++i) {
90  label_counts[distances[i].second]++;
91  }
92 
93  // Find the label with the maximum votes
94  int max_votes = -1;
95  int predicted_label = -1;
96  for (const auto &entry : label_counts) {
97  if (entry.second > max_votes) {
98  max_votes = entry.second;
99  predicted_label = entry.first;
100  }
101  }
102 
103  return predicted_label;
104 }
105 
106 double
107 gpmp::ml::KNN::calculateEuclideanDistance(const std::vector<double> &vec1,
108  const std::vector<double> &vec2) {
109  double distance = 0.0;
110  for (size_t i = 0; i < vec1.size(); ++i) {
111  double diff = vec1[i] - vec2[i];
112  distance += diff * diff;
113  }
114  return std::sqrt(distance);
115 }
int predict(const std::vector< double > &input_vector, int k)
Predicts the label of a given input vector using KNN algorithm.
Definition: knn.cpp:56
double calculateEuclideanDistance(const std::vector< double > &vec1, const std::vector< double > &vec2)
Calculates the Euclidean distance between two vectors.
Definition: knn.cpp:107
void train(const std::vector< std::vector< double >> &training_data, const std::vector< int > &labels)
Trains the KNN model with the given training data and labels.
Definition: knn.cpp:45
~KNN()
Destructor for the KNN class.
Definition: knn.cpp:42
KNN()
Constructor for the KNN class.
Definition: knn.cpp:39
K-Nearest Neighbors Clustering.