openGPMP/svc_8cpp_source.html

 /*************************************************************************

  *

  *  Project

  *                         _____ _____  __  __ _____

  *                        / ____|  __ \|  \/  |  __ \

  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |

  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/

  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |

  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|

  *      | |

  *      |_|

  *

  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.

  *

  * This software is licensed as described in the file LICENSE, which

  * you should have received as part of this distribution. The terms

  * among other details are referenced in the official documentation

  * seen here : https://akielaries.github.io/openGPMP/ along with

  * important files seen in this project.

  *

  * You may opt to use, copy, modify, merge, publish, distribute

  * and/or sell copies of the Software, and permit persons to whom

  * the Software is furnished to do so, under the terms of the

  * LICENSE file. As this is an Open Source effort, all implementations

  * must be of the same methodology.

  *

  *

  *

  * This software is distributed on an AS IS basis, WITHOUT

  * WARRANTY OF ANY KIND, either express or implied.

  *

  ************************************************************************/

 #include <cmath>

 #include <iostream>

 #include <numeric>

 #include <openGPMP/ml/svc.hpp>


 gpmp::ml::SVC::SVC(double C_, double l_rate, int max_iters, double tol)

     : C(C_), learning_rate(l_rate), max_iterations(max_iters), tolerance(tol) {

 }


 void gpmp::ml::SVC::fit(const std::vector<std::vector<double>> &X_train,

                         const std::vector<int> &y_train) {

     // Initialize weights and bias

     weights.resize(X_train[0].size(), 0.0);

     bias = 0.0;


     // Stochastic Gradient Descent

     for (int iter = 0; iter < max_iterations; ++iter) {

         update_weights(X_train, y_train);


         // Check convergence

         double loss = compute_loss(X_train, y_train);

         if (loss < tolerance) {

             break;

         }

     }

 }


 std::vector<int>

 gpmp::ml::SVC::predict(const std::vector<std::vector<double>> &X_test) {

     std::vector<int> predictions;

     for (const auto &instance : X_test) {

         double score = 0.0;

         for (size_t i = 0; i < instance.size(); ++i) {

             score += instance[i] * weights[i];

         }

         score += bias;

         int prediction = (score >= 0) ? 1 : -1;

         predictions.push_back(prediction);

     }

     return predictions;

 }


 double gpmp::ml::SVC::hinge_loss(double prediction, int label) {

     return fmax(0, 1 - label * prediction);

 }


 double gpmp::ml::SVC::compute_loss(const std::vector<std::vector<double>> &X,

                                    const std::vector<int> &y) {

     double loss = 0.0;

     for (size_t i = 0; i < X.size(); ++i) {

         double prediction = 0.0;

         for (size_t j = 0; j < X[i].size(); ++j) {

             prediction += X[i][j] * weights[j];

         }

         prediction += bias;

         loss += hinge_loss(prediction, y[i]);

     }

     // Add L2 regularization

     for (double weight : weights) {

         loss += 0.5 * C * weight * weight;

     }

     return loss / X.size();

 }


 void gpmp::ml::SVC::update_weights(const std::vector<std::vector<double>> &X,

                                    const std::vector<int> &y) {

     for (size_t i = 0; i < X.size(); ++i) {

         double prediction = 0.0;

         for (size_t j = 0; j < X[i].size(); ++j) {

             prediction += X[i][j] * weights[j];

         }

         prediction += bias;

         double loss_grad = -y[i] * (1 - prediction);

         if (loss_grad > 0) {

             // Update weights

             for (size_t j = 0; j < X[i].size(); ++j) {

                 weights[j] -= learning_rate * (C * weights[j] - y[i] * X[i][j]);

             }

             // Update bias

             bias -= learning_rate * y[i];

         }

     }

 }


 std::vector<double>

 gpmp::ml::SVC::predict_proba(const std::vector<std::vector<double>> &X_test) {

     std::vector<double> probabilities;

     for (const auto &instance : X_test) {

         double score = 0.0;

         for (size_t i = 0; i < instance.size(); ++i) {

             score += instance[i] * weights[i];

         }

         score += bias;

         double prob = sigmoid(score);

         probabilities.push_back(prob);

     }

     return probabilities;

 }


 double gpmp::ml::SVC::score(const std::vector<std::vector<double>> &X_test,

                             const std::vector<int> &y_test) {

     std::vector<int> predictions = predict(X_test);

     return accuracy(predictions, y_test);

 }


 void gpmp::ml::SVC::set_kernel(const std::string &k_type) {

     this->kernel_type = k_type;

 }


 void gpmp::ml::SVC::set_kernel_parameters(double k_param) {

     this->kernel_param = k_param;

 }


 void gpmp::ml::SVC::set_random_state(int seed) {

     this->random_state = seed;

 }


 void gpmp::ml::SVC::set_verbose(bool vbose) {

     this->verbose = vbose;

 }


 void gpmp::ml::SVC::set_penalty(const std::string &p_type) {

     this->penalty_type = p_type;

 }


 double gpmp::ml::SVC::cross_val_score(const std::vector<std::vector<double>> &X,

                                       const std::vector<int> &y,

                                       int cv) {

     std::vector<int> fold_sizes = k_fold_indices(X.size(), cv);

     double avg_score = 0.0;

     for (int i = 0; i < cv; ++i) {

         std::vector<std::vector<double>> X_train, X_valid;

         std::vector<int> y_train, y_valid;

         int start = 0;

         for (int j = 0; j < cv; ++j) {

             if (j != i) {

                 int end = start + fold_sizes[j];

                 for (int k = start; k < end; ++k) {

                     X_train.push_back(X[k]);

                     y_train.push_back(y[k]);

                 }

             } else {

                 int end = start + fold_sizes[j];

                 for (int k = start; k < end; ++k) {

                     X_valid.push_back(X[k]);

                     y_valid.push_back(y[k]);

                 }

             }

             start += fold_sizes[j];

         }

         fit(X_train, y_train);

         double score_val = score(X_valid, y_valid);

         if (verbose) {

             std::cout << "Cross-validation fold " << i + 1

                       << " accuracy: " << score_val << std::endl;

         }

         avg_score += score_val;

     }

     return avg_score / cv;

 }


 std::vector<double>

 gpmp::ml::SVC::grid_search(const std::vector<std::vector<double>> &X,

                            const std::vector<int> &y,

                            const std::vector<double> &C_values,

                            const std::vector<double> &kernel_params,

                            int cv) {

     std::vector<double> best_params;

     double best_score = 0.0;

     for (double val : C_values) {

         for (double param : kernel_params) {

             set_kernel_parameters(param);

             set_penalty("l2");  // Default penalty type

             set_verbose(false); // Suppress verbose output

             double score = cross_val_score(X, y, cv);

             if (score > best_score) {

                 best_score = score;

                 best_params = {val, param};

             }

         }

     }

     return best_params;

 }


 double gpmp::ml::SVC::kernel(const std::vector<double> &x1,

                              const std::vector<double> &x2) {

     if (kernel_type == "linear") {

         return dot_product(x1, x2);

     } else {

         // Default to linear kernel if unknown kernel type

         return dot_product(x1, x2);

     }

 }


 double gpmp::ml::SVC::dot_product(const std::vector<double> &x1,

                                   const std::vector<double> &x2) {

     double result = 0.0;

     for (size_t i = 0; i < x1.size(); ++i) {

         result += x1[i] * x2[i];

     }

     return result;

 }


 double gpmp::ml::SVC::sigmoid(double z) {

     return 1.0 / (1.0 + exp(-z));

 }


 std::vector<int> gpmp::ml::SVC::k_fold_indices(int num_instances, int k) {

     std::vector<int> fold_sizes(k, num_instances / k);

     int remainder = num_instances % k;

     for (int i = 0; i < remainder; ++i) {

         fold_sizes[i]++;

     }

     return fold_sizes;

 }


 double gpmp::ml::SVC::accuracy(const std::vector<int> &predictions,

                                const std::vector<int> &labels) {

     int correct = 0;

     for (size_t i = 0; i < predictions.size(); ++i) {

         if (predictions[i] == labels[i]) {

             correct++;

         }

     }

     return static_cast<double>(correct) / predictions.size();

 }

gpmp::ml::SVC::set_random_state
void set_random_state(int seed)
Set the random seed for reproducibility.
Definition: svc.cpp:146

gpmp::ml::SVC::dot_product
double dot_product(const std::vector< double > &x1, const std::vector< double > &x2)
Compute the dot product between two vectors.
Definition: svc.cpp:227

gpmp::ml::SVC::SVC
SVC(double C_=1.0, double l_rate=0.01, int max_iters=1000, double tol=1e-4)
Constructor for SVC class.
Definition: svc.cpp:38

gpmp::ml::SVC::set_verbose
void set_verbose(bool vbose)
Enable or disable verbose output during training.
Definition: svc.cpp:150

gpmp::ml::SVC::set_kernel
void set_kernel(const std::string &k_type)
Set the kernel type for the SVC.
Definition: svc.cpp:138

gpmp::ml::SVC::score
double score(const std::vector< std::vector< double >> &X_test, const std::vector< int > &y_test)
Calculate the accuracy of the model on given test data.
Definition: svc.cpp:132

gpmp::ml::SVC::predict_proba
std::vector< double > predict_proba(const std::vector< std::vector< double >> &X_test)
Predict class probabilities for given test data.
Definition: svc.cpp:118

gpmp::ml::SVC::update_weights
void update_weights(const std::vector< std::vector< double >> &X, const std::vector< int > &y)
Update weights and bias using stochastic gradient descent.
Definition: svc.cpp:97

gpmp::ml::SVC::k_fold_indices
std::vector< int > k_fold_indices(int num_instances, int k)
Generate k-fold indices for cross-validation.
Definition: svc.cpp:240

gpmp::ml::SVC::sigmoid
double sigmoid(double z)
Sigmoid activation function.
Definition: svc.cpp:236

gpmp::ml::SVC::fit
void fit(const std::vector< std::vector< double >> &X_train, const std::vector< int > &y_train)
Fit the SVC model to the training data.
Definition: svc.cpp:42

gpmp::ml::SVC::cross_val_score
double cross_val_score(const std::vector< std::vector< double >> &X, const std::vector< int > &y, int cv=5)
Perform k-fold cross-validation on the model.
Definition: svc.cpp:158

gpmp::ml::SVC::set_kernel_parameters
void set_kernel_parameters(double k_param)
Set the kernel parameters for the SVC.
Definition: svc.cpp:142

gpmp::ml::SVC::set_penalty
void set_penalty(const std::string &p_type)
Set the penalty type for regularization.
Definition: svc.cpp:154

gpmp::ml::SVC::kernel
double kernel(const std::vector< double > &x1, const std::vector< double > &x2)
Compute the kernel function between two vectors.
Definition: svc.cpp:217

gpmp::ml::SVC::hinge_loss
double hinge_loss(double prediction, int label)
Compute the hinge loss for a given prediction and true label.
Definition: svc.cpp:75

gpmp::ml::SVC::accuracy
double accuracy(const std::vector< int > &predictions, const std::vector< int > &labels)
Compute the accuracy of predictions.
Definition: svc.cpp:249

gpmp::ml::SVC::grid_search
std::vector< double > grid_search(const std::vector< std::vector< double >> &X, const std::vector< int > &y, const std::vector< double > &C_values, const std::vector< double > &kernel_params, int cv=5)
Perform grid search for hyperparameter tuning.
Definition: svc.cpp:195

gpmp::ml::SVC::predict
std::vector< int > predict(const std::vector< std::vector< double >> &X_test)
Predict labels for given test data.
Definition: svc.cpp:61

gpmp::ml::SVC::compute_loss
double compute_loss(const std::vector< std::vector< double >> &X, const std::vector< int > &y)
Compute the total loss (including regularization) for the model.
Definition: svc.cpp:79

gpmp::linalg::dot_product
int dot_product(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2)
Computes the dot product for vectors of signed 8-bit integers.

python.linalg.C
list C
Definition: linalg.py:24

svc.hpp