Line data Source code
1 : /*************************************************************************
2 : *
3 : * Project
4 : * _____ _____ __ __ _____
5 : * / ____| __ \| \/ | __ \
6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8 : *| (_) | |_) | __/ | | | |__| | | | | | | |
9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10 : * | |
11 : * |_|
12 : *
13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14 : *
15 : * This software is licensed as described in the file LICENSE, which
16 : * you should have received as part of this distribution. The terms
17 : * among other details are referenced in the official documentation
18 : * seen here : https://akielaries.github.io/openGPMP/ along with
19 : * important files seen in this project.
20 : *
21 : * You may opt to use, copy, modify, merge, publish, distribute
22 : * and/or sell copies of the Software, and permit persons to whom
23 : * the Software is furnished to do so, under the terms of the
24 : * LICENSE file. As this is an Open Source effort, all implementations
25 : * must be of the same methodology.
26 : *
27 : *
28 : *
29 : * This software is distributed on an AS IS basis, WITHOUT
30 : * WARRANTY OF ANY KIND, either express or implied.
31 : *
32 : ************************************************************************/
33 : #include <algorithm>
34 : #include <cmath>
35 : #include <numeric>
36 : #include <openGPMP/ml/logreg.hpp>
37 : #include <stdexcept>
38 : #include <vector>
39 :
40 0 : gpmp::ml::LogReg::LogReg(double l_rate, int num_iters, double lda)
41 0 : : learning_rate(l_rate), num_iterations(num_iters), lambda(lda) {
42 0 : }
43 :
44 0 : gpmp::ml::LogReg::~LogReg() {
45 0 : }
46 :
47 0 : void gpmp::ml::LogReg::train(const std::vector<std::vector<double>> &X_train,
48 : const std::vector<int> &y_train) {
49 : // Initialize weights to zeros
50 0 : weights.assign(X_train[0].size() + 1, 0.0);
51 :
52 0 : for (int iter = 0; iter < num_iterations; ++iter) {
53 0 : std::vector<double> gradient(X_train[0].size() + 1, 0.0);
54 :
55 0 : for (size_t i = 0; i < X_train.size(); ++i) {
56 : // Add bias term to input
57 0 : std::vector<double> input = {1.0};
58 0 : input.insert(input.end(), X_train[i].begin(), X_train[i].end());
59 :
60 : // Compute the predicted value
61 0 : double predicted = sigmoid(std::inner_product(input.begin(),
62 : input.end(),
63 : weights.begin(),
64 : 0.0));
65 :
66 : // Compute gradient for each weight
67 0 : for (size_t j = 0; j < gradient.size(); ++j) {
68 0 : gradient[j] += (predicted - y_train[i]) * input[j];
69 : }
70 0 : }
71 :
72 : // Update weights using gradient descent
73 0 : for (size_t j = 0; j < weights.size(); ++j) {
74 0 : weights[j] -= learning_rate *
75 0 : (gradient[j] / X_train.size() + lambda * weights[j]);
76 : }
77 0 : }
78 0 : }
79 :
80 : std::vector<double>
81 0 : gpmp::ml::LogReg::predict(const std::vector<std::vector<double>> &X_test) {
82 0 : std::vector<double> predictions;
83 0 : for (size_t i = 0; i < X_test.size(); ++i) {
84 : // Add bias term to input
85 0 : std::vector<double> input = {1.0};
86 0 : input.insert(input.end(), X_test[i].begin(), X_test[i].end());
87 :
88 : // Compute the predicted value
89 0 : double predicted = sigmoid(std::inner_product(input.begin(),
90 : input.end(),
91 : weights.begin(),
92 0 : 0.0));
93 0 : predictions.push_back(predicted);
94 0 : }
95 0 : return predictions;
96 0 : }
97 :
98 : double
99 0 : gpmp::ml::LogReg::accuracy(const std::vector<std::vector<double>> &X_test,
100 : const std::vector<int> &y_test) {
101 0 : std::vector<double> predictions = predict(X_test);
102 0 : int correct = 0;
103 0 : for (size_t i = 0; i < predictions.size(); ++i) {
104 0 : if ((predictions[i] >= 0.5 && y_test[i] == 1) ||
105 0 : (predictions[i] < 0.5 && y_test[i] == 0)) {
106 0 : correct++;
107 : }
108 : }
109 0 : return static_cast<double>(correct) / y_test.size();
110 0 : }
111 :
112 0 : double gpmp::ml::LogReg::sigmoid(double z) {
113 0 : return 1.0 / (1.0 + exp(-z));
114 : }
115 :
116 0 : void gpmp::ml::LogReg::feature_scaling(std::vector<std::vector<double>> &X) {
117 0 : if (X.empty()) {
118 0 : throw std::invalid_argument("Input feature matrix is empty.");
119 : }
120 :
121 0 : size_t num_features = X[0].size();
122 0 : for (size_t j = 0; j < num_features; ++j) {
123 0 : double min_val = X[0][j], max_val = X[0][j];
124 0 : for (size_t i = 1; i < X.size(); ++i) {
125 0 : if (X[i][j] < min_val) {
126 0 : min_val = X[i][j];
127 : }
128 0 : if (X[i][j] > max_val) {
129 0 : max_val = X[i][j];
130 : }
131 : }
132 :
133 0 : if (fabs(min_val - max_val) < std::numeric_limits<double>::epsilon()) {
134 0 : continue; // Skip if all values are the same
135 : }
136 :
137 0 : double range = max_val - min_val;
138 0 : for (size_t i = 0; i < X.size(); ++i) {
139 0 : X[i][j] = (X[i][j] - min_val) / range;
140 : }
141 : }
142 0 : }
143 :
144 : double
145 0 : gpmp::ml::LogReg::cost_function(const std::vector<std::vector<double>> &X,
146 : const std::vector<int> &y) {
147 0 : double cost = 0.0;
148 0 : for (size_t i = 0; i < X.size(); ++i) {
149 : // Add bias term to input
150 0 : std::vector<double> input = {1.0};
151 0 : input.insert(input.end(), X[i].begin(), X[i].end());
152 :
153 0 : double predicted = sigmoid(std::inner_product(input.begin(),
154 : input.end(),
155 : weights.begin(),
156 : 0.0));
157 0 : cost += -y[i] * log(predicted) - (1 - y[i]) * log(1 - predicted);
158 0 : }
159 0 : cost /= X.size();
160 0 : return cost;
161 : }
162 :
163 : std::vector<int>
164 0 : gpmp::ml::LogReg::classify(const std::vector<std::vector<double>> &X) {
165 0 : std::vector<int> classifications;
166 0 : for (size_t i = 0; i < X.size(); ++i) {
167 : // Add bias term to input
168 0 : std::vector<double> input = {1.0};
169 0 : input.insert(input.end(), X[i].begin(), X[i].end());
170 :
171 : // Compute the predicted value
172 0 : double predicted = sigmoid(std::inner_product(input.begin(),
173 : input.end(),
174 : weights.begin(),
175 : 0.0));
176 0 : int classification = predicted >= 0.5 ? 1 : 0;
177 0 : classifications.push_back(classification);
178 0 : }
179 0 : return classifications;
180 0 : }
|