openGPMP
Open Source Mathematics Package
bayes_net.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 #include <iostream>
35 
36 BNN::BNN(int in_size, int h_size, int out_size, double p_variance)
37  : input_size(in_size), hidden_size(h_size), output_size(out_size),
38  prior_variance(p_variance), rng(std::random_device{}()) {
39  // initialize weights and biases with random values
40  std::normal_distribution<double> normal_distribution(0.0, 1.0);
41 
43  std::vector<double>(input_size));
44  for (int i = 0; i < hidden_size; ++i) {
45  for (int j = 0; j < input_size; ++j) {
46  input_to_hidden_weights[i][j] = normal_distribution(rng);
47  }
48  }
49 
50  hidden_biases.resize(hidden_size);
51  for (int i = 0; i < hidden_size; ++i) {
52  hidden_biases[i] = normal_distribution(rng);
53  }
54 
56  std::vector<double>(hidden_size));
57  for (int i = 0; i < output_size; ++i) {
58  for (int j = 0; j < hidden_size; ++j) {
59  hidden_to_output_weights[i][j] = normal_distribution(rng);
60  }
61  }
62 
63  output_biases.resize(output_size);
64  for (int i = 0; i < output_size; ++i) {
65  output_biases[i] = normal_distribution(rng);
66  }
67 }
68 
69 double BNN::activation_function(double x) {
70  // Using sigmoid activation function for simplicity
71  return 1.0 / (1.0 + std::exp(-x));
72 }
73 
74 void BNN::fit(const std::vector<std::vector<double>> &X_train,
75  const std::vector<std::vector<double>> &y_train,
76  int epochs) {
77  const double learning_rate = 0.01;
78 
79  for (int epoch = 0; epoch < epochs; ++epoch) {
80  update_weights(X_train, y_train, learning_rate);
81 
82  // Print loss for monitoring training progress
83  if (epoch % 100 == 0) {
84  double loss = compute_loss(X_train, y_train);
85  std::cout << "Epoch: " << epoch << ", Loss: " << loss << std::endl;
86  }
87  }
88 }
89 
90 std::vector<double> BNN::predict(const std::vector<double> &input_vector) {
91  // Forward pass
92  std::vector<double> hidden_output(hidden_size);
93  for (int i = 0; i < hidden_size; ++i) {
94  hidden_output[i] = activation_function(
95  std::inner_product(input_vector.begin(),
96  input_vector.end(),
97  input_to_hidden_weights[i].begin(),
98  0.0) +
99  hidden_biases[i]);
100  }
101 
102  std::vector<double> output(output_size);
103  for (int i = 0; i < output_size; ++i) {
104  output[i] = activation_function(
105  std::inner_product(hidden_output.begin(),
106  hidden_output.end(),
107  hidden_to_output_weights[i].begin(),
108  0.0) +
109  output_biases[i]);
110  }
111 
112  return output;
113 }
114 
116  // Compute log-likelihood of the prior
117  double log_likelihood = 0.0;
118 
119  // Prior for input-to-hidden weights
120  for (int i = 0; i < hidden_size; ++i) {
121  for (int j = 0; j < input_size; ++j) {
122  log_likelihood +=
123  -0.5 *
124  (std::pow(input_to_hidden_weights[i][j], 2) / prior_variance +
125  std::log(2 * M_PI * prior_variance));
126  }
127  }
128 
129  // Prior for hidden biases
130  for (int i = 0; i < hidden_size; ++i) {
131  log_likelihood +=
132  -0.5 * (std::pow(hidden_biases[i], 2) / prior_variance +
133  std::log(2 * M_PI * prior_variance));
134  }
135 
136  // Prior for hidden-to-output weights
137  for (int i = 0; i < output_size; ++i) {
138  for (int j = 0; j < hidden_size; ++j) {
139  log_likelihood +=
140  -0.5 *
141  (std::pow(hidden_to_output_weights[i][j], 2) / prior_variance +
142  std::log(2 * M_PI * prior_variance));
143  }
144  }
145 
146  // Prior for output biases
147  for (int i = 0; i < output_size; ++i) {
148  log_likelihood +=
149  -0.5 * (std::pow(output_biases[i], 2) / prior_variance +
150  std::log(2 * M_PI * prior_variance));
151  }
152 
153  return log_likelihood;
154 }
155 
156 double BNN::log_likelihood(const std::vector<std::vector<double>> &X,
157  const std::vector<std::vector<double>> &y) {
158  // Compute log-likelihood of the data
159  double log_likelihood = 0.0;
160 
161  for (size_t i = 0; i < X.size(); ++i) {
162  // Forward pass
163  std::vector<double> hidden_output(hidden_size);
164  for (int j = 0; j < hidden_size; ++j) {
165  hidden_output[j] = activation_function(
166  std::inner_product(X[i].begin(),
167  X[i].end(),
168  input_to_hidden_weights[j].begin(),
169  0.0) +
170  hidden_biases[j]);
171  }
172 
173  std::vector<double> output(output_size);
174  for (int j = 0; j < output_size; ++j) {
175  output[j] = activation_function(
176  std::inner_product(hidden_output.begin(),
177  hidden_output.end(),
178  hidden_to_output_weights[j].begin(),
179  0.0) +
180  output_biases[j]);
181  }
182 
183  // Log-likelihood for Gaussian noise
184  for (int j = 0; j < output_size; ++j) {
185  log_likelihood +=
186  -0.5 * std::log(2 * M_PI) - 0.5 * std::log(prior_variance) -
187  0.5 * std::pow(y[i][j] - output[j], 2) / prior_variance;
188  }
189  }
190 
191  return log_likelihood;
192 }
193 
194 double BNN::compute_loss(const std::vector<std::vector<double>> &X,
195  const std::vector<std::vector<double>> &y) {
196  // Negative log posterior (loss function)
197  return -log_likelihood(X, y) - prior_log_likelihood();
198 }
199 
200 void BNN::update_weights(const std::vector<std::vector<double>> &X,
201  const std::vector<std::vector<double>> &y,
202  double learning_rate) {
203  // Update weights using stochastic gradient descent
204  for (size_t i = 0; i < X.size(); ++i) {
205  // Forward pass
206  std::vector<double> hidden_output(hidden_size);
207  for (int j = 0; j < hidden_size; ++j) {
208  hidden_output[j] = activation_function(
209  std::inner_product(X[i].begin(),
210  X[i].end(),
211  input_to_hidden_weights[j].begin(),
212  0.0) +
213  hidden_biases[j]);
214  }
215 
216  std::vector<double> output(output_size);
217  for (int j = 0; j < output_size; ++j) {
218  output[j] = activation_function(
219  std::inner_product(hidden_output.begin(),
220  hidden_output.end(),
221  hidden_to_output_weights[j].begin(),
222  0.0) +
223  output_biases[j]);
224  }
225 
226  // Backward pass (stochastic gradient descent)
227  for (int j = 0; j < output_size; ++j) {
228  // Gradient for output layer
229  double output_gradient =
230  (y[i][j] - output[j]) * output[j] * (1.0 - output[j]);
231 
232  // Update hidden-to-output weights
233  for (int k = 0; k < hidden_size; ++k) {
234  hidden_to_output_weights[j][k] +=
235  learning_rate * output_gradient * hidden_output[k];
236  }
237 
238  // Update output biases
239  output_biases[j] += learning_rate * output_gradient;
240  }
241 
242  // Backward pass for hidden layer
243  for (int j = 0; j < hidden_size; ++j) {
244  // Gradient for hidden layer
245  double hidden_gradient = 0.0;
246  for (int k = 0; k < output_size; ++k) {
247  hidden_gradient += input_to_hidden_weights[k][j] *
248  (y[i][k] - output[k]) * output[k] *
249  (1.0 - output[k]);
250  }
251  hidden_gradient *= hidden_output[j] * (1.0 - hidden_output[j]);
252 
253  // Update input-to-hidden weights
254  for (int k = 0; k < input_size; ++k) {
255  input_to_hidden_weights[j][k] +=
256  learning_rate * hidden_gradient * X[i][k];
257  }
258 
259  // Update hidden biases
260  hidden_biases[j] += learning_rate * hidden_gradient;
261  }
262  }
263 }
std::vector< double > hidden_biases
Biases for the hidden layer.
Definition: bayes_net.hpp:105
void fit(const std::vector< std::vector< double >> &X_train, const std::vector< std::vector< double >> &y_train, int epochs=1000)
Train the Bayesian Neural Network.
Definition: bayes_net.cpp:74
void update_weights(const std::vector< std::vector< double >> &X, const std::vector< std::vector< double >> &y, double learning_rate)
Update weights using stochastic gradient descent.
Definition: bayes_net.cpp:200
int hidden_size
Number of hidden units in the network.
Definition: bayes_net.hpp:85
int input_size
Number of input features.
Definition: bayes_net.hpp:80
BNN(int input_size, int hidden_size, int output_size, double prior_variance=1.0)
Constructor for the BNN class.
Definition: bayes_net.cpp:36
double log_likelihood(const std::vector< std::vector< double >> &X, const std::vector< std::vector< double >> &y)
Compute the log-likelihood of the data.
Definition: bayes_net.cpp:156
double compute_loss(const std::vector< std::vector< double >> &X, const std::vector< std::vector< double >> &y)
Compute the negative log posterior (loss function)
Definition: bayes_net.cpp:194
double activation_function(double x)
Activation function for the hidden layer.
Definition: bayes_net.cpp:69
std::mt19937 rng
Mersenne Twister random number generator.
Definition: bayes_net.hpp:120
double prior_log_likelihood()
Compute the log-likelihood of the prior distribution.
Definition: bayes_net.cpp:115
std::vector< std::vector< double > > input_to_hidden_weights
Weights from input to hidden layer.
Definition: bayes_net.hpp:100
double prior_variance
Variance for the prior distribution.
Definition: bayes_net.hpp:95
std::vector< std::vector< double > > hidden_to_output_weights
Weights from hidden to output layer.
Definition: bayes_net.hpp:110
int output_size
Number of output units in the network.
Definition: bayes_net.hpp:90
std::vector< double > output_biases
Biases for the output layer.
Definition: bayes_net.hpp:115
std::vector< double > predict(const std::vector< double > &input_vector)
Predict the output for a given input.
Definition: bayes_net.cpp:90