Line data Source code
1 : /*************************************************************************
2 : *
3 : * Project
4 : * _____ _____ __ __ _____
5 : * / ____| __ \| \/ | __ \
6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8 : *| (_) | |_) | __/ | | | |__| | | | | | | |
9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10 : * | |
11 : * |_|
12 : *
13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14 : *
15 : * This software is licensed as described in the file LICENSE, which
16 : * you should have received as part of this distribution. The terms
17 : * among other details are referenced in the official documentation
18 : * seen here : https://akielaries.github.io/openGPMP/ along with
19 : * important files seen in this project.
20 : *
21 : * You may opt to use, copy, modify, merge, publish, distribute
22 : * and/or sell copies of the Software, and permit persons to whom
23 : * the Software is furnished to do so, under the terms of the
24 : * LICENSE file. As this is an Open Source effort, all implementations
25 : * must be of the same methodology.
26 : *
27 : *
28 : *
29 : * This software is distributed on an AS IS basis, WITHOUT
30 : * WARRANTY OF ANY KIND, either express or implied.
31 : *
32 : ************************************************************************/
33 : #include <cmath>
34 : #include <iostream>
35 : #include <numeric>
36 : #include <openGPMP/ml/svc.hpp>
37 :
38 0 : gpmp::ml::SVC::SVC(double C_, double l_rate, int max_iters, double tol)
39 0 : : C(C_), learning_rate(l_rate), max_iterations(max_iters), tolerance(tol) {
40 0 : }
41 :
42 0 : void gpmp::ml::SVC::fit(const std::vector<std::vector<double>> &X_train,
43 : const std::vector<int> &y_train) {
44 : // Initialize weights and bias
45 0 : weights.resize(X_train[0].size(), 0.0);
46 0 : bias = 0.0;
47 :
48 : // Stochastic Gradient Descent
49 0 : for (int iter = 0; iter < max_iterations; ++iter) {
50 0 : update_weights(X_train, y_train);
51 :
52 : // Check convergence
53 0 : double loss = compute_loss(X_train, y_train);
54 0 : if (loss < tolerance) {
55 0 : break;
56 : }
57 : }
58 0 : }
59 :
60 : std::vector<int>
61 0 : gpmp::ml::SVC::predict(const std::vector<std::vector<double>> &X_test) {
62 0 : std::vector<int> predictions;
63 0 : for (const auto &instance : X_test) {
64 0 : double score = 0.0;
65 0 : for (size_t i = 0; i < instance.size(); ++i) {
66 0 : score += instance[i] * weights[i];
67 : }
68 0 : score += bias;
69 0 : int prediction = (score >= 0) ? 1 : -1;
70 0 : predictions.push_back(prediction);
71 : }
72 0 : return predictions;
73 0 : }
74 :
75 0 : double gpmp::ml::SVC::hinge_loss(double prediction, int label) {
76 0 : return fmax(0, 1 - label * prediction);
77 : }
78 :
79 0 : double gpmp::ml::SVC::compute_loss(const std::vector<std::vector<double>> &X,
80 : const std::vector<int> &y) {
81 0 : double loss = 0.0;
82 0 : for (size_t i = 0; i < X.size(); ++i) {
83 0 : double prediction = 0.0;
84 0 : for (size_t j = 0; j < X[i].size(); ++j) {
85 0 : prediction += X[i][j] * weights[j];
86 : }
87 0 : prediction += bias;
88 0 : loss += hinge_loss(prediction, y[i]);
89 : }
90 : // Add L2 regularization
91 0 : for (double weight : weights) {
92 0 : loss += 0.5 * C * weight * weight;
93 : }
94 0 : return loss / X.size();
95 : }
96 :
97 0 : void gpmp::ml::SVC::update_weights(const std::vector<std::vector<double>> &X,
98 : const std::vector<int> &y) {
99 0 : for (size_t i = 0; i < X.size(); ++i) {
100 0 : double prediction = 0.0;
101 0 : for (size_t j = 0; j < X[i].size(); ++j) {
102 0 : prediction += X[i][j] * weights[j];
103 : }
104 0 : prediction += bias;
105 0 : double loss_grad = -y[i] * (1 - prediction);
106 0 : if (loss_grad > 0) {
107 : // Update weights
108 0 : for (size_t j = 0; j < X[i].size(); ++j) {
109 0 : weights[j] -= learning_rate * (C * weights[j] - y[i] * X[i][j]);
110 : }
111 : // Update bias
112 0 : bias -= learning_rate * y[i];
113 : }
114 : }
115 0 : }
116 :
117 : std::vector<double>
118 0 : gpmp::ml::SVC::predict_proba(const std::vector<std::vector<double>> &X_test) {
119 0 : std::vector<double> probabilities;
120 0 : for (const auto &instance : X_test) {
121 0 : double score = 0.0;
122 0 : for (size_t i = 0; i < instance.size(); ++i) {
123 0 : score += instance[i] * weights[i];
124 : }
125 0 : score += bias;
126 0 : double prob = sigmoid(score);
127 0 : probabilities.push_back(prob);
128 : }
129 0 : return probabilities;
130 0 : }
131 :
132 0 : double gpmp::ml::SVC::score(const std::vector<std::vector<double>> &X_test,
133 : const std::vector<int> &y_test) {
134 0 : std::vector<int> predictions = predict(X_test);
135 0 : return accuracy(predictions, y_test);
136 0 : }
137 :
138 0 : void gpmp::ml::SVC::set_kernel(const std::string &k_type) {
139 0 : this->kernel_type = k_type;
140 0 : }
141 :
142 0 : void gpmp::ml::SVC::set_kernel_parameters(double k_param) {
143 0 : this->kernel_param = k_param;
144 0 : }
145 :
146 0 : void gpmp::ml::SVC::set_random_state(int seed) {
147 0 : this->random_state = seed;
148 0 : }
149 :
150 0 : void gpmp::ml::SVC::set_verbose(bool vbose) {
151 0 : this->verbose = vbose;
152 0 : }
153 :
154 0 : void gpmp::ml::SVC::set_penalty(const std::string &p_type) {
155 0 : this->penalty_type = p_type;
156 0 : }
157 :
158 0 : double gpmp::ml::SVC::cross_val_score(const std::vector<std::vector<double>> &X,
159 : const std::vector<int> &y,
160 : int cv) {
161 0 : std::vector<int> fold_sizes = k_fold_indices(X.size(), cv);
162 0 : double avg_score = 0.0;
163 0 : for (int i = 0; i < cv; ++i) {
164 0 : std::vector<std::vector<double>> X_train, X_valid;
165 0 : std::vector<int> y_train, y_valid;
166 0 : int start = 0;
167 0 : for (int j = 0; j < cv; ++j) {
168 0 : if (j != i) {
169 0 : int end = start + fold_sizes[j];
170 0 : for (int k = start; k < end; ++k) {
171 0 : X_train.push_back(X[k]);
172 0 : y_train.push_back(y[k]);
173 : }
174 : } else {
175 0 : int end = start + fold_sizes[j];
176 0 : for (int k = start; k < end; ++k) {
177 0 : X_valid.push_back(X[k]);
178 0 : y_valid.push_back(y[k]);
179 : }
180 : }
181 0 : start += fold_sizes[j];
182 : }
183 0 : fit(X_train, y_train);
184 0 : double score_val = score(X_valid, y_valid);
185 0 : if (verbose) {
186 0 : std::cout << "Cross-validation fold " << i + 1
187 0 : << " accuracy: " << score_val << std::endl;
188 : }
189 0 : avg_score += score_val;
190 0 : }
191 0 : return avg_score / cv;
192 0 : }
193 :
194 : std::vector<double>
195 0 : gpmp::ml::SVC::grid_search(const std::vector<std::vector<double>> &X,
196 : const std::vector<int> &y,
197 : const std::vector<double> &C_values,
198 : const std::vector<double> &kernel_params,
199 : int cv) {
200 0 : std::vector<double> best_params;
201 0 : double best_score = 0.0;
202 0 : for (double val : C_values) {
203 0 : for (double param : kernel_params) {
204 0 : set_kernel_parameters(param);
205 0 : set_penalty("l2"); // Default penalty type
206 0 : set_verbose(false); // Suppress verbose output
207 0 : double score = cross_val_score(X, y, cv);
208 0 : if (score > best_score) {
209 0 : best_score = score;
210 0 : best_params = {val, param};
211 : }
212 : }
213 : }
214 0 : return best_params;
215 0 : }
216 :
217 0 : double gpmp::ml::SVC::kernel(const std::vector<double> &x1,
218 : const std::vector<double> &x2) {
219 0 : if (kernel_type == "linear") {
220 0 : return dot_product(x1, x2);
221 : } else {
222 : // Default to linear kernel if unknown kernel type
223 0 : return dot_product(x1, x2);
224 : }
225 : }
226 :
227 0 : double gpmp::ml::SVC::dot_product(const std::vector<double> &x1,
228 : const std::vector<double> &x2) {
229 0 : double result = 0.0;
230 0 : for (size_t i = 0; i < x1.size(); ++i) {
231 0 : result += x1[i] * x2[i];
232 : }
233 0 : return result;
234 : }
235 :
236 0 : double gpmp::ml::SVC::sigmoid(double z) {
237 0 : return 1.0 / (1.0 + exp(-z));
238 : }
239 :
240 0 : std::vector<int> gpmp::ml::SVC::k_fold_indices(int num_instances, int k) {
241 0 : std::vector<int> fold_sizes(k, num_instances / k);
242 0 : int remainder = num_instances % k;
243 0 : for (int i = 0; i < remainder; ++i) {
244 0 : fold_sizes[i]++;
245 : }
246 0 : return fold_sizes;
247 : }
248 :
249 0 : double gpmp::ml::SVC::accuracy(const std::vector<int> &predictions,
250 : const std::vector<int> &labels) {
251 0 : int correct = 0;
252 0 : for (size_t i = 0; i < predictions.size(); ++i) {
253 0 : if (predictions[i] == labels[i]) {
254 0 : correct++;
255 : }
256 : }
257 0 : return static_cast<double>(correct) / predictions.size();
258 : }
|