Line data Source code
1 : /*************************************************************************
2 : *
3 : * Project
4 : * _____ _____ __ __ _____
5 : * / ____| __ \| \/ | __ \
6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8 : *| (_) | |_) | __/ | | | |__| | | | | | | |
9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10 : * | |
11 : * |_|
12 : *
13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14 : *
15 : * This software is licensed as described in the file LICENSE, which
16 : * you should have received as part of this distribution. The terms
17 : * among other details are referenced in the official documentation
18 : * seen here : https://akielaries.github.io/openGPMP/ along with
19 : * important files seen in this project.
20 : *
21 : * You may opt to use, copy, modify, merge, publish, distribute
22 : * and/or sell copies of the Software, and permit persons to whom
23 : * the Software is furnished to do so, under the terms of the
24 : * LICENSE file. As this is an Open Source effort, all implementations
25 : * must be of the same methodology.
26 : *
27 : *
28 : *
29 : * This software is distributed on an AS IS basis, WITHOUT
30 : * WARRANTY OF ANY KIND, either express or implied.
31 : *
32 : ************************************************************************/
33 : #include <iostream>
34 : #include <openGPMP/ml/bayes_net.hpp>
35 :
36 0 : BNN::BNN(int in_size, int h_size, int out_size, double p_variance)
37 0 : : input_size(in_size), hidden_size(h_size), output_size(out_size),
38 0 : prior_variance(p_variance), rng(std::random_device{}()) {
39 : // initialize weights and biases with random values
40 0 : std::normal_distribution<double> normal_distribution(0.0, 1.0);
41 :
42 0 : input_to_hidden_weights.resize(hidden_size,
43 0 : std::vector<double>(input_size));
44 0 : for (int i = 0; i < hidden_size; ++i) {
45 0 : for (int j = 0; j < input_size; ++j) {
46 0 : input_to_hidden_weights[i][j] = normal_distribution(rng);
47 : }
48 : }
49 :
50 0 : hidden_biases.resize(hidden_size);
51 0 : for (int i = 0; i < hidden_size; ++i) {
52 0 : hidden_biases[i] = normal_distribution(rng);
53 : }
54 :
55 0 : hidden_to_output_weights.resize(output_size,
56 0 : std::vector<double>(hidden_size));
57 0 : for (int i = 0; i < output_size; ++i) {
58 0 : for (int j = 0; j < hidden_size; ++j) {
59 0 : hidden_to_output_weights[i][j] = normal_distribution(rng);
60 : }
61 : }
62 :
63 0 : output_biases.resize(output_size);
64 0 : for (int i = 0; i < output_size; ++i) {
65 0 : output_biases[i] = normal_distribution(rng);
66 : }
67 0 : }
68 :
69 0 : double BNN::activation_function(double x) {
70 : // Using sigmoid activation function for simplicity
71 0 : return 1.0 / (1.0 + std::exp(-x));
72 : }
73 :
74 0 : void BNN::fit(const std::vector<std::vector<double>> &X_train,
75 : const std::vector<std::vector<double>> &y_train,
76 : int epochs) {
77 0 : const double learning_rate = 0.01;
78 :
79 0 : for (int epoch = 0; epoch < epochs; ++epoch) {
80 0 : update_weights(X_train, y_train, learning_rate);
81 :
82 : // Print loss for monitoring training progress
83 0 : if (epoch % 100 == 0) {
84 0 : double loss = compute_loss(X_train, y_train);
85 0 : std::cout << "Epoch: " << epoch << ", Loss: " << loss << std::endl;
86 : }
87 : }
88 0 : }
89 :
90 0 : std::vector<double> BNN::predict(const std::vector<double> &input_vector) {
91 : // Forward pass
92 0 : std::vector<double> hidden_output(hidden_size);
93 0 : for (int i = 0; i < hidden_size; ++i) {
94 0 : hidden_output[i] = activation_function(
95 0 : std::inner_product(input_vector.begin(),
96 : input_vector.end(),
97 0 : input_to_hidden_weights[i].begin(),
98 : 0.0) +
99 0 : hidden_biases[i]);
100 : }
101 :
102 0 : std::vector<double> output(output_size);
103 0 : for (int i = 0; i < output_size; ++i) {
104 0 : output[i] = activation_function(
105 0 : std::inner_product(hidden_output.begin(),
106 : hidden_output.end(),
107 0 : hidden_to_output_weights[i].begin(),
108 : 0.0) +
109 0 : output_biases[i]);
110 : }
111 :
112 0 : return output;
113 0 : }
114 :
115 0 : double BNN::prior_log_likelihood() {
116 : // Compute log-likelihood of the prior
117 0 : double log_likelihood = 0.0;
118 :
119 : // Prior for input-to-hidden weights
120 0 : for (int i = 0; i < hidden_size; ++i) {
121 0 : for (int j = 0; j < input_size; ++j) {
122 0 : log_likelihood +=
123 0 : -0.5 *
124 0 : (std::pow(input_to_hidden_weights[i][j], 2) / prior_variance +
125 0 : std::log(2 * M_PI * prior_variance));
126 : }
127 : }
128 :
129 : // Prior for hidden biases
130 0 : for (int i = 0; i < hidden_size; ++i) {
131 0 : log_likelihood +=
132 0 : -0.5 * (std::pow(hidden_biases[i], 2) / prior_variance +
133 0 : std::log(2 * M_PI * prior_variance));
134 : }
135 :
136 : // Prior for hidden-to-output weights
137 0 : for (int i = 0; i < output_size; ++i) {
138 0 : for (int j = 0; j < hidden_size; ++j) {
139 0 : log_likelihood +=
140 0 : -0.5 *
141 0 : (std::pow(hidden_to_output_weights[i][j], 2) / prior_variance +
142 0 : std::log(2 * M_PI * prior_variance));
143 : }
144 : }
145 :
146 : // Prior for output biases
147 0 : for (int i = 0; i < output_size; ++i) {
148 0 : log_likelihood +=
149 0 : -0.5 * (std::pow(output_biases[i], 2) / prior_variance +
150 0 : std::log(2 * M_PI * prior_variance));
151 : }
152 :
153 0 : return log_likelihood;
154 : }
155 :
156 0 : double BNN::log_likelihood(const std::vector<std::vector<double>> &X,
157 : const std::vector<std::vector<double>> &y) {
158 : // Compute log-likelihood of the data
159 0 : double log_likelihood = 0.0;
160 :
161 0 : for (size_t i = 0; i < X.size(); ++i) {
162 : // Forward pass
163 0 : std::vector<double> hidden_output(hidden_size);
164 0 : for (int j = 0; j < hidden_size; ++j) {
165 0 : hidden_output[j] = activation_function(
166 0 : std::inner_product(X[i].begin(),
167 0 : X[i].end(),
168 0 : input_to_hidden_weights[j].begin(),
169 : 0.0) +
170 0 : hidden_biases[j]);
171 : }
172 :
173 0 : std::vector<double> output(output_size);
174 0 : for (int j = 0; j < output_size; ++j) {
175 0 : output[j] = activation_function(
176 0 : std::inner_product(hidden_output.begin(),
177 : hidden_output.end(),
178 0 : hidden_to_output_weights[j].begin(),
179 : 0.0) +
180 0 : output_biases[j]);
181 : }
182 :
183 : // Log-likelihood for Gaussian noise
184 0 : for (int j = 0; j < output_size; ++j) {
185 0 : log_likelihood +=
186 0 : -0.5 * std::log(2 * M_PI) - 0.5 * std::log(prior_variance) -
187 0 : 0.5 * std::pow(y[i][j] - output[j], 2) / prior_variance;
188 : }
189 0 : }
190 :
191 0 : return log_likelihood;
192 : }
193 :
194 0 : double BNN::compute_loss(const std::vector<std::vector<double>> &X,
195 : const std::vector<std::vector<double>> &y) {
196 : // Negative log posterior (loss function)
197 0 : return -log_likelihood(X, y) - prior_log_likelihood();
198 : }
199 :
200 0 : void BNN::update_weights(const std::vector<std::vector<double>> &X,
201 : const std::vector<std::vector<double>> &y,
202 : double learning_rate) {
203 : // Update weights using stochastic gradient descent
204 0 : for (size_t i = 0; i < X.size(); ++i) {
205 : // Forward pass
206 0 : std::vector<double> hidden_output(hidden_size);
207 0 : for (int j = 0; j < hidden_size; ++j) {
208 0 : hidden_output[j] = activation_function(
209 0 : std::inner_product(X[i].begin(),
210 0 : X[i].end(),
211 0 : input_to_hidden_weights[j].begin(),
212 : 0.0) +
213 0 : hidden_biases[j]);
214 : }
215 :
216 0 : std::vector<double> output(output_size);
217 0 : for (int j = 0; j < output_size; ++j) {
218 0 : output[j] = activation_function(
219 0 : std::inner_product(hidden_output.begin(),
220 : hidden_output.end(),
221 0 : hidden_to_output_weights[j].begin(),
222 : 0.0) +
223 0 : output_biases[j]);
224 : }
225 :
226 : // Backward pass (stochastic gradient descent)
227 0 : for (int j = 0; j < output_size; ++j) {
228 : // Gradient for output layer
229 : double output_gradient =
230 0 : (y[i][j] - output[j]) * output[j] * (1.0 - output[j]);
231 :
232 : // Update hidden-to-output weights
233 0 : for (int k = 0; k < hidden_size; ++k) {
234 0 : hidden_to_output_weights[j][k] +=
235 0 : learning_rate * output_gradient * hidden_output[k];
236 : }
237 :
238 : // Update output biases
239 0 : output_biases[j] += learning_rate * output_gradient;
240 : }
241 :
242 : // Backward pass for hidden layer
243 0 : for (int j = 0; j < hidden_size; ++j) {
244 : // Gradient for hidden layer
245 0 : double hidden_gradient = 0.0;
246 0 : for (int k = 0; k < output_size; ++k) {
247 0 : hidden_gradient += input_to_hidden_weights[k][j] *
248 0 : (y[i][k] - output[k]) * output[k] *
249 0 : (1.0 - output[k]);
250 : }
251 0 : hidden_gradient *= hidden_output[j] * (1.0 - hidden_output[j]);
252 :
253 : // Update input-to-hidden weights
254 0 : for (int k = 0; k < input_size; ++k) {
255 0 : input_to_hidden_weights[j][k] +=
256 0 : learning_rate * hidden_gradient * X[i][k];
257 : }
258 :
259 : // Update hidden biases
260 0 : hidden_biases[j] += learning_rate * hidden_gradient;
261 : }
262 0 : }
263 0 : }
|