LCOV - lcov.info - modules/ml/svc.cpp

LCOV - code coverage report

Current view:	top level - modules/ml - svc.cpp (source / functions)		Hit	Total	Coverage
Test:	lcov.info	Lines:	0	142	0.0 %
Date:	2024-05-13 05:06:06	Functions:	0	20	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*************************************************************************
       2             :  *
       3             :  *  Project
       4             :  *                         _____ _____  __  __ _____
       5             :  *                        / ____|  __ \|  \/  |  __ \
       6             :  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |
       7             :  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/
       8             :  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |
       9             :  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|
      10             :  *      | |
      11             :  *      |_|
      12             :  *
      13             :  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
      14             :  *
      15             :  * This software is licensed as described in the file LICENSE, which
      16             :  * you should have received as part of this distribution. The terms
      17             :  * among other details are referenced in the official documentation
      18             :  * seen here : https://akielaries.github.io/openGPMP/ along with
      19             :  * important files seen in this project.
      20             :  *
      21             :  * You may opt to use, copy, modify, merge, publish, distribute
      22             :  * and/or sell copies of the Software, and permit persons to whom
      23             :  * the Software is furnished to do so, under the terms of the
      24             :  * LICENSE file. As this is an Open Source effort, all implementations
      25             :  * must be of the same methodology.
      26             :  *
      27             :  *
      28             :  *
      29             :  * This software is distributed on an AS IS basis, WITHOUT
      30             :  * WARRANTY OF ANY KIND, either express or implied.
      31             :  *
      32             :  ************************************************************************/
      33             : #include <cmath>
      34             : #include <iostream>
      35             : #include <numeric>
      36             : #include <openGPMP/ml/svc.hpp>
      37             : 
      38           0 : gpmp::ml::SVC::SVC(double C_, double l_rate, int max_iters, double tol)
      39           0 :     : C(C_), learning_rate(l_rate), max_iterations(max_iters), tolerance(tol) {
      40           0 : }
      41             : 
      42           0 : void gpmp::ml::SVC::fit(const std::vector<std::vector<double>> &X_train,
      43             :                         const std::vector<int> &y_train) {
      44             :     // Initialize weights and bias
      45           0 :     weights.resize(X_train[0].size(), 0.0);
      46           0 :     bias = 0.0;
      47             : 
      48             :     // Stochastic Gradient Descent
      49           0 :     for (int iter = 0; iter < max_iterations; ++iter) {
      50           0 :         update_weights(X_train, y_train);
      51             : 
      52             :         // Check convergence
      53           0 :         double loss = compute_loss(X_train, y_train);
      54           0 :         if (loss < tolerance) {
      55           0 :             break;
      56             :         }
      57             :     }
      58           0 : }
      59             : 
      60             : std::vector<int>
      61           0 : gpmp::ml::SVC::predict(const std::vector<std::vector<double>> &X_test) {
      62           0 :     std::vector<int> predictions;
      63           0 :     for (const auto &instance : X_test) {
      64           0 :         double score = 0.0;
      65           0 :         for (size_t i = 0; i < instance.size(); ++i) {
      66           0 :             score += instance[i] * weights[i];
      67             :         }
      68           0 :         score += bias;
      69           0 :         int prediction = (score >= 0) ? 1 : -1;
      70           0 :         predictions.push_back(prediction);
      71             :     }
      72           0 :     return predictions;
      73           0 : }
      74             : 
      75           0 : double gpmp::ml::SVC::hinge_loss(double prediction, int label) {
      76           0 :     return fmax(0, 1 - label * prediction);
      77             : }
      78             : 
      79           0 : double gpmp::ml::SVC::compute_loss(const std::vector<std::vector<double>> &X,
      80             :                                    const std::vector<int> &y) {
      81           0 :     double loss = 0.0;
      82           0 :     for (size_t i = 0; i < X.size(); ++i) {
      83           0 :         double prediction = 0.0;
      84           0 :         for (size_t j = 0; j < X[i].size(); ++j) {
      85           0 :             prediction += X[i][j] * weights[j];
      86             :         }
      87           0 :         prediction += bias;
      88           0 :         loss += hinge_loss(prediction, y[i]);
      89             :     }
      90             :     // Add L2 regularization
      91           0 :     for (double weight : weights) {
      92           0 :         loss += 0.5 * C * weight * weight;
      93             :     }
      94           0 :     return loss / X.size();
      95             : }
      96             : 
      97           0 : void gpmp::ml::SVC::update_weights(const std::vector<std::vector<double>> &X,
      98             :                                    const std::vector<int> &y) {
      99           0 :     for (size_t i = 0; i < X.size(); ++i) {
     100           0 :         double prediction = 0.0;
     101           0 :         for (size_t j = 0; j < X[i].size(); ++j) {
     102           0 :             prediction += X[i][j] * weights[j];
     103             :         }
     104           0 :         prediction += bias;
     105           0 :         double loss_grad = -y[i] * (1 - prediction);
     106           0 :         if (loss_grad > 0) {
     107             :             // Update weights
     108           0 :             for (size_t j = 0; j < X[i].size(); ++j) {
     109           0 :                 weights[j] -= learning_rate * (C * weights[j] - y[i] * X[i][j]);
     110             :             }
     111             :             // Update bias
     112           0 :             bias -= learning_rate * y[i];
     113             :         }
     114             :     }
     115           0 : }
     116             : 
     117             : std::vector<double>
     118           0 : gpmp::ml::SVC::predict_proba(const std::vector<std::vector<double>> &X_test) {
     119           0 :     std::vector<double> probabilities;
     120           0 :     for (const auto &instance : X_test) {
     121           0 :         double score = 0.0;
     122           0 :         for (size_t i = 0; i < instance.size(); ++i) {
     123           0 :             score += instance[i] * weights[i];
     124             :         }
     125           0 :         score += bias;
     126           0 :         double prob = sigmoid(score);
     127           0 :         probabilities.push_back(prob);
     128             :     }
     129           0 :     return probabilities;
     130           0 : }
     131             : 
     132           0 : double gpmp::ml::SVC::score(const std::vector<std::vector<double>> &X_test,
     133             :                             const std::vector<int> &y_test) {
     134           0 :     std::vector<int> predictions = predict(X_test);
     135           0 :     return accuracy(predictions, y_test);
     136           0 : }
     137             : 
     138           0 : void gpmp::ml::SVC::set_kernel(const std::string &k_type) {
     139           0 :     this->kernel_type = k_type;
     140           0 : }
     141             : 
     142           0 : void gpmp::ml::SVC::set_kernel_parameters(double k_param) {
     143           0 :     this->kernel_param = k_param;
     144           0 : }
     145             : 
     146           0 : void gpmp::ml::SVC::set_random_state(int seed) {
     147           0 :     this->random_state = seed;
     148           0 : }
     149             : 
     150           0 : void gpmp::ml::SVC::set_verbose(bool vbose) {
     151           0 :     this->verbose = vbose;
     152           0 : }
     153             : 
     154           0 : void gpmp::ml::SVC::set_penalty(const std::string &p_type) {
     155           0 :     this->penalty_type = p_type;
     156           0 : }
     157             : 
     158           0 : double gpmp::ml::SVC::cross_val_score(const std::vector<std::vector<double>> &X,
     159             :                                       const std::vector<int> &y,
     160             :                                       int cv) {
     161           0 :     std::vector<int> fold_sizes = k_fold_indices(X.size(), cv);
     162           0 :     double avg_score = 0.0;
     163           0 :     for (int i = 0; i < cv; ++i) {
     164           0 :         std::vector<std::vector<double>> X_train, X_valid;
     165           0 :         std::vector<int> y_train, y_valid;
     166           0 :         int start = 0;
     167           0 :         for (int j = 0; j < cv; ++j) {
     168           0 :             if (j != i) {
     169           0 :                 int end = start + fold_sizes[j];
     170           0 :                 for (int k = start; k < end; ++k) {
     171           0 :                     X_train.push_back(X[k]);
     172           0 :                     y_train.push_back(y[k]);
     173             :                 }
     174             :             } else {
     175           0 :                 int end = start + fold_sizes[j];
     176           0 :                 for (int k = start; k < end; ++k) {
     177           0 :                     X_valid.push_back(X[k]);
     178           0 :                     y_valid.push_back(y[k]);
     179             :                 }
     180             :             }
     181           0 :             start += fold_sizes[j];
     182             :         }
     183           0 :         fit(X_train, y_train);
     184           0 :         double score_val = score(X_valid, y_valid);
     185           0 :         if (verbose) {
     186           0 :             std::cout << "Cross-validation fold " << i + 1
     187           0 :                       << " accuracy: " << score_val << std::endl;
     188             :         }
     189           0 :         avg_score += score_val;
     190           0 :     }
     191           0 :     return avg_score / cv;
     192           0 : }
     193             : 
     194             : std::vector<double>
     195           0 : gpmp::ml::SVC::grid_search(const std::vector<std::vector<double>> &X,
     196             :                            const std::vector<int> &y,
     197             :                            const std::vector<double> &C_values,
     198             :                            const std::vector<double> &kernel_params,
     199             :                            int cv) {
     200           0 :     std::vector<double> best_params;
     201           0 :     double best_score = 0.0;
     202           0 :     for (double val : C_values) {
     203           0 :         for (double param : kernel_params) {
     204           0 :             set_kernel_parameters(param);
     205           0 :             set_penalty("l2");  // Default penalty type
     206           0 :             set_verbose(false); // Suppress verbose output
     207           0 :             double score = cross_val_score(X, y, cv);
     208           0 :             if (score > best_score) {
     209           0 :                 best_score = score;
     210           0 :                 best_params = {val, param};
     211             :             }
     212             :         }
     213             :     }
     214           0 :     return best_params;
     215           0 : }
     216             : 
     217           0 : double gpmp::ml::SVC::kernel(const std::vector<double> &x1,
     218             :                              const std::vector<double> &x2) {
     219           0 :     if (kernel_type == "linear") {
     220           0 :         return dot_product(x1, x2);
     221             :     } else {
     222             :         // Default to linear kernel if unknown kernel type
     223           0 :         return dot_product(x1, x2);
     224             :     }
     225             : }
     226             : 
     227           0 : double gpmp::ml::SVC::dot_product(const std::vector<double> &x1,
     228             :                                   const std::vector<double> &x2) {
     229           0 :     double result = 0.0;
     230           0 :     for (size_t i = 0; i < x1.size(); ++i) {
     231           0 :         result += x1[i] * x2[i];
     232             :     }
     233           0 :     return result;
     234             : }
     235             : 
     236           0 : double gpmp::ml::SVC::sigmoid(double z) {
     237           0 :     return 1.0 / (1.0 + exp(-z));
     238             : }
     239             : 
     240           0 : std::vector<int> gpmp::ml::SVC::k_fold_indices(int num_instances, int k) {
     241           0 :     std::vector<int> fold_sizes(k, num_instances / k);
     242           0 :     int remainder = num_instances % k;
     243           0 :     for (int i = 0; i < remainder; ++i) {
     244           0 :         fold_sizes[i]++;
     245             :     }
     246           0 :     return fold_sizes;
     247             : }
     248             : 
     249           0 : double gpmp::ml::SVC::accuracy(const std::vector<int> &predictions,
     250             :                                const std::vector<int> &labels) {
     251           0 :     int correct = 0;
     252           0 :     for (size_t i = 0; i < predictions.size(); ++i) {
     253           0 :         if (predictions[i] == labels[i]) {
     254           0 :             correct++;
     255             :         }
     256             :     }
     257           0 :     return static_cast<double>(correct) / predictions.size();
     258             : }

Generated by: LCOV version 1.14