42 std::ofstream file(filename, std::ios::out | std::ios::binary);
53 std::cout <<
"Model saved successfully." << std::endl;
55 std::cerr <<
"Unable to open the file for saving." << std::endl;
60 std::ifstream file(filename, std::ios::in | std::ios::binary);
63 file.read(
reinterpret_cast<char *
>(&weights_input_hidden[0][0]),
64 weights_input_hidden.size() * weights_input_hidden[0].size() *
66 file.read(
reinterpret_cast<char *
>(&weights_hidden_output[0][0]),
67 weights_hidden_output.size() *
68 weights_hidden_output[0].size() *
sizeof(
double));
71 std::cout <<
"Model loaded successfully." << std::endl;
73 std::cerr <<
"Unable to open the file for loading." << std::endl;
78 learning_rate = initial_rate;
83 const int decay_interval = 10;
84 if (epoch % decay_interval == 0) {
86 std::cout <<
"Learning rate updated to: " << learning_rate
87 <<
" at epoch " << epoch << std::endl;
96 : input_size(in_size), hidden_size(h_size), output_size(out_size),
97 learning_rate(l_rate) {
118 std::vector<double> result;
119 for (
double val : x) {
120 result.push_back(1.0 / (1.0 + exp(-val)));
128 std::vector<double> hidden(hidden_size);
129 std::vector<double> output(output_size);
132 for (
int i = 0; i < hidden_size; ++i) {
134 for (
int j = 0; j < input_size; ++j) {
135 hidden[i] += input[j] * weights_input_hidden[j][i];
137 hidden[i] = sigmoid({hidden[i]})[0];
141 for (
int i = 0; i < output_size; ++i) {
143 for (
int j = 0; j < hidden_size; ++j) {
144 output[i] += hidden[j] * weights_hidden_output[j][i];
146 output[i] = sigmoid({output[i]})[0];
153 const std::vector<std::vector<double>> &training_data,
155 for (
int epoch = 0; epoch < epochs; ++epoch) {
156 for (
const auto &input : training_data) {
158 std::vector<double> hidden(hidden_size);
159 std::vector<double> output(output_size);
162 for (
int i = 0; i < hidden_size; ++i) {
164 for (
int j = 0; j < input_size; ++j) {
165 hidden[i] += input[j] * weights_input_hidden[j][i];
167 hidden[i] = sigmoid({hidden[i]})[0];
171 for (
int i = 0; i < output_size; ++i) {
173 for (
int j = 0; j < hidden_size; ++j) {
174 output[i] += hidden[j] * weights_hidden_output[j][i];
176 output[i] = sigmoid({output[i]})[0];
180 for (
int i = 0; i < output_size; ++i) {
181 for (
int j = 0; j < hidden_size; ++j) {
182 weights_hidden_output[j][i] -=
183 learning_rate * (output[i] - input[i]) * hidden[j];
187 for (
int i = 0; i < hidden_size; ++i) {
188 for (
int j = 0; j < input_size; ++j) {
190 for (
int k = 0; k < output_size; ++k) {
191 error += (output[k] - input[k]) *
192 weights_hidden_output[i][k];
194 weights_input_hidden[j][i] -= learning_rate * error *
195 input[j] * (1 - hidden[i]) *
204 std::cout <<
"Input to Hidden Weights:\n";
205 for (
int i = 0; i < input_size; ++i) {
206 for (
int j = 0; j < hidden_size; ++j) {
207 std::cout << weights_input_hidden[i][j] <<
" ";
212 std::cout <<
"\nHidden to Output Weights:\n";
213 for (
int i = 0; i < hidden_size; ++i) {
214 for (
int j = 0; j < output_size; ++j) {
215 std::cout << weights_hidden_output[i][j] <<
" ";
227 :
AutoEncoder(in_size, h_size, out_size, l_rate), sparsity_weight(s_weight),
228 sparsity_target(s_target) {
232 const std::vector<std::vector<double>> &training_data,
235 const double SPARSITY_TARGET_DECAY = 0.1;
237 for (
int epoch = 0; epoch < epochs; ++epoch) {
238 for (
const auto ¤t_input : training_data) {
240 std::vector<double> hidden = forward(current_input);
243 for (
int i = 0; i < output_size; ++i) {
244 for (
int j = 0; j < hidden_size; ++j) {
245 weights_hidden_output[j][i] -=
246 learning_rate * (hidden[i] - current_input[i]) *
251 for (
int i = 0; i < hidden_size; ++i) {
252 for (
int j = 0; j < input_size; ++j) {
254 for (
int k = 0; k < output_size; ++k) {
255 error += (hidden[k] - current_input[k]) *
256 weights_hidden_output[i][k];
259 double sparsity_term =
260 sparsity_weight * (sparsity_target - hidden[i]);
262 weights_input_hidden[j][i] -=
263 learning_rate * (error + sparsity_term) *
264 current_input[j] * (1 - hidden[i]) * hidden[i];
268 for (
int i = 0; i < hidden_size; ++i) {
269 double average_activation = 0.0;
270 for (
const auto &input : training_data) {
271 std::vector<double> current_hidden = forward(input);
272 average_activation += current_hidden[i];
274 average_activation /= training_data.size();
276 (1.0 - SPARSITY_TARGET_DECAY) * sparsity_target +
277 SPARSITY_TARGET_DECAY * average_activation;
289 corruption_level(c_level) {
293 const std::vector<std::vector<double>> &training_data,
295 std::srand(std::time(0));
297 for (
int epoch = 0; epoch < epochs; ++epoch) {
298 for (
const auto &input : training_data) {
300 std::vector<double> noisy_input = input;
301 for (
double &val : noisy_input) {
302 if ((std::rand() / (RAND_MAX + 1.0)) < corruption_level) {
309 std::vector<double> hidden = forward(noisy_input);
312 for (
int i = 0; i < output_size; ++i) {
313 for (
int j = 0; j < hidden_size; ++j) {
314 weights_hidden_output[j][i] -=
315 learning_rate * (hidden[i] - input[i]) * hidden[j];
319 for (
int i = 0; i < hidden_size; ++i) {
320 for (
int j = 0; j < input_size; ++j) {
322 for (
int k = 0; k < output_size; ++k) {
323 error += (hidden[k] - input[k]) *
324 weights_hidden_output[i][k];
326 weights_input_hidden[j][i] -= learning_rate * error *
328 (1 - hidden[i]) * hidden[i];
341 contractive_weight(c_weight) {
345 const std::vector<std::vector<double>> &training_data,
347 for (
int epoch = 0; epoch < epochs; ++epoch) {
348 for (
const auto &input : training_data) {
350 std::vector<double> hidden = forward(input);
353 for (
int i = 0; i < output_size; ++i) {
354 for (
int j = 0; j < hidden_size; ++j) {
355 weights_hidden_output[j][i] -=
356 learning_rate * (hidden[i] - input[i]) * hidden[j];
360 for (
int i = 0; i < hidden_size; ++i) {
361 for (
int j = 0; j < input_size; ++j) {
363 for (
int k = 0; k < output_size; ++k) {
364 error += (hidden[k] - input[k]) *
365 weights_hidden_output[i][k];
367 double contractive_term =
368 contractive_weight * hidden[i] * (1 - hidden[i]);
369 weights_input_hidden[j][i] -=
370 learning_rate * (error + contractive_term) * input[j] *
371 (1 - hidden[i]) * hidden[i];
383 :
AutoEncoder(in_size, h_size, out_size, l_rate), mdl_weight(m_wt) {
387 const std::vector<std::vector<double>> &training_data,
389 for (
int epoch = 0; epoch < epochs; ++epoch) {
390 for (
const auto &input : training_data) {
392 std::vector<double> hidden = forward(input);
395 for (
int i = 0; i < output_size; ++i) {
396 for (
int j = 0; j < hidden_size; ++j) {
397 weights_hidden_output[j][i] -=
398 learning_rate * (hidden[i] - input[i]) * hidden[j];
402 for (
int i = 0; i < hidden_size; ++i) {
403 for (
int j = 0; j < input_size; ++j) {
405 for (
int k = 0; k < output_size; ++k) {
406 error += (hidden[k] - input[k]) *
407 weights_hidden_output[i][k];
409 double mdl_term = mdl_weight * log(1.0 + fabs(error));
410 weights_input_hidden[j][i] -=
411 learning_rate * (error + mdl_term) * input[j] *
412 (1 - hidden[i]) * hidden[i];
424 :
AutoEncoder(in_size, h_size, out_size, l_rate), temperature(temp) {
428 const std::vector<std::vector<double>> &training_data,
430 std::default_random_engine generator;
431 std::uniform_real_distribution<double> uniform_distribution(0.0, 1.0);
433 for (
int epoch = 0; epoch < epochs; ++epoch) {
434 for (
const auto &input : training_data) {
436 std::vector<double> hidden;
437 for (
int i = 0; i < hidden_size; ++i) {
438 double u = uniform_distribution(generator);
440 double g = -log(-log(
u));
441 double s = (input[i] + g) / temperature;
442 double p = 1.0 / (1.0 + exp(-s));
447 for (
int i = 0; i < output_size; ++i) {
448 for (
int j = 0; j < hidden_size; ++j) {
449 weights_hidden_output[j][i] -=
450 learning_rate * (hidden[i] - input[i]) * hidden[j];
454 for (
int i = 0; i < hidden_size; ++i) {
455 for (
int j = 0; j < input_size; ++j) {
457 for (
int k = 0; k < output_size; ++k) {
458 error += (hidden[k] - input[k]) *
459 weights_hidden_output[i][k];
461 weights_input_hidden[j][i] -= learning_rate * error *
462 input[j] * (1 - hidden[i]) *
478 static std::default_random_engine generator;
479 static std::normal_distribution<double> distribution(0.0, 1.0);
480 return distribution(generator);
484 double log_variance) {
485 double standard_normal_sample = sample_dist();
486 return mean + exp(0.5 * log_variance) * standard_normal_sample;
491 std::vector<double> hidden(hidden_size);
493 for (
int i = 0; i < hidden_size; ++i) {
495 for (
int j = 0; j < input_size; ++j) {
496 hidden[i] += input[j] * weights_input_hidden[j][i];
498 hidden[i] = sigmoid({hidden[i]})[0];
505 const std::vector<double> &hidden_sampled) {
506 std::vector<double> output(output_size);
508 for (
int i = 0; i < output_size; ++i) {
510 for (
int j = 0; j < hidden_size; ++j) {
511 output[i] += hidden_sampled[j] * weights_hidden_output[j][i];
513 output[i] = sigmoid({output[i]})[0];
520 const std::vector<double> &input,
521 const std::vector<double> &output,
522 const std::vector<double> &hidden_sampled) {
523 for (
int i = 0; i < output_size; ++i) {
524 for (
int j = 0; j < hidden_size; ++j) {
525 weights_hidden_output[j][i] -=
526 learning_rate * (output[i] - input[i]) * hidden_sampled[j];
530 for (
int i = 0; i < hidden_size; ++i) {
531 for (
int j = 0; j < input_size; ++j) {
533 for (
int k = 0; k < output_size; ++k) {
534 error += (output[k] - input[k]) * weights_hidden_output[i][k];
536 double hidden_gradient =
537 hidden_sampled[i] * (1 - hidden_sampled[i]);
539 double log_variance_gradient =
540 1.0 / (1.0 + exp(-hidden_log_variance[i]));
541 weights_input_hidden[j][i] -=
543 (error * hidden_gradient +
544 (hidden_sampled[i] - hidden_mean[i]) * hidden_gradient +
545 (hidden_log_variance[i] - log_variance_gradient) *
553 const std::vector<std::vector<double>> &training_data,
555 std::default_random_engine generator;
556 std::normal_distribution<double> normal_distribution(0.0, 1.0);
557 std::vector<double> hidden_sampled;
559 for (
int epoch = 0; epoch < epochs; ++epoch) {
560 for (
const auto &input : training_data) {
562 hidden_mean = encoder(input);
563 hidden_log_variance = encoder(input);
565 for (
int i = 0; i < hidden_size; ++i) {
566 hidden_sampled.push_back(
567 reparameterize(hidden_mean[i], hidden_log_variance[i]));
571 std::vector<double> output = decoder(hidden_sampled);
574 gradient_descent(input, output, hidden_sampled);
584 weights_recurrent(h_size, std::vector<double>(h_size, 0.0)) {
588 const std::vector<std::vector<double>> &training_data,
590 for (
int epoch = 0; epoch < epochs; ++epoch) {
591 std::vector<double> previous_hidden(hidden_size, 0.0);
593 for (
const auto &input : training_data) {
595 std::vector<double> hidden = recurr_fwd(input, previous_hidden);
596 std::vector<double> output = forward(hidden);
599 for (
int i = 0; i < output_size; ++i) {
600 for (
int j = 0; j < hidden_size; ++j) {
601 weights_hidden_output[j][i] -=
602 learning_rate * (output[i] - input[i]) * hidden[j];
606 for (
int i = 0; i < hidden_size; ++i) {
607 for (
int j = 0; j < input_size; ++j) {
609 for (
int k = 0; k < output_size; ++k) {
610 error += (output[k] - input[k]) *
611 weights_hidden_output[i][k];
613 weights_input_hidden[j][i] -= learning_rate * error *
614 input[j] * (1 - hidden[i]) *
620 for (
int i = 0; i < hidden_size; ++i) {
621 for (
int j = 0; j < hidden_size; ++j) {
622 weights_recurrent[j][i] -=
623 learning_rate * (hidden[i] - previous_hidden[i]) *
628 previous_hidden = hidden;
634 const std::vector<double> &input,
635 const std::vector<double> &previous_hidden) {
636 std::vector<double> recurrent_input(hidden_size, 0.0);
640 for (
int i = 0; i < hidden_size; ++i) {
641 recurrent_input[i] = 0.0;
642 for (
int j = 0; j < input_size; ++j) {
643 recurrent_input[i] += weights_input_hidden[j][i] * input[j];
645 for (
int j = 0; j < hidden_size; ++j) {
646 recurrent_input[i] += weights_recurrent[j][i] * previous_hidden[j];
650 recurrent_input[i] = sigmoid({recurrent_input[i]})[0];
653 return recurrent_input;
664 const std::vector<std::vector<double>> &training_data,
666 for (
int epoch = 0; epoch < epochs; ++epoch) {
667 for (
const auto &input : training_data) {
669 std::vector<double> hidden = forward(input);
670 std::vector<double> output = forward(hidden);
673 for (
int i = 0; i < output_size; ++i) {
674 for (
int j = 0; j < hidden_size; ++j) {
675 weights_hidden_output[j][i] -=
676 learning_rate * (output[i] - input[i]) * hidden[j];
680 for (
int i = 0; i < hidden_size; ++i) {
681 for (
int j = 0; j < input_size; ++j) {
683 for (
int k = 0; k < output_size; ++k) {
684 error += (output[k] - input[k]) *
685 weights_hidden_output[i][k];
687 weights_input_hidden[j][i] -= learning_rate * error *
688 input[j] * (1 - hidden[i]) *
A simple implementation of a vanilla autoencoder.
std::vector< std::vector< double > > weights_input_hidden
Weights from the input layer to the hidden layer.
std::vector< std::vector< double > > weights_hidden_output
Weights from the hidden layer to the output layer.
int hidden_size
Size of the hidden layer.
virtual void lrate_update(int epoch)
Update the learning rate based on a schedule.
void lrate_set(double initial_rate)
Set the initial learning rate.
AutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate)
Constructor for the AutoEncoder class.
virtual void train(const std::vector< std::vector< double >> &training_data, int epochs)
Train the autoencoder on a dataset.
int output_size
Size of the output layer.
std::vector< double > forward(const std::vector< double > &input)
Forward pass through the autoencoder.
virtual void load(const std::string &filename)
Load model weights from a file.
void display()
Print the weights of the autoencoder.
virtual void save(const std::string &filename) const
Save the model weights to a file.
std::vector< double > sigmoid(const std::vector< double > &x)
Sigmoid activation function.
int input_size
Size of the input layer.
virtual void train(const std::vector< std::vector< double >> &training_data, int epochs) override
Trains the Concrete autoencoder on the given training data.
ConcreteAutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate, double temperature)
Constructor for the ConcreteAutoEncoder class.
ContractiveAutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate, double contractive_weight)
Constructor for the ContractiveAutoEncoder class.
virtual void train(const std::vector< std::vector< double >> &training_data, int epochs) override
Trains the contractive autoencoder on the given training data.
DenoisingAutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate, double corruption_level)
Constructor for the DenoisingAutoEncoder class.
void train(const std::vector< std::vector< double >> &training_data, int epochs) override
Adds noise to the input data and trains the denoising autoencoder.
virtual void train(const std::vector< std::vector< double >> &training_data, int epochs) override
Trains the fully connected autoencoder on the given training data.
FullAutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate)
Constructor for the FullAutoEncoder class.
MDLAutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate, double mdl_weight)
Constructor for the MDLAutoEncoder class.
virtual void train(const std::vector< std::vector< double >> &training_data, int epochs) override
Trains the MDL autoencoder on the given training data.
std::vector< double > recurr_fwd(const std::vector< double > &input, const std::vector< double > &previous_hidden)
Performs a forward pass through the recurrent layer.
RecurrentAutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate)
Constructor for the RecurrentAutoEncoder class.
virtual void train(const std::vector< std::vector< double >> &training_data, int epochs) override
Trains the recurrent autoencoder on the given sequential data.
void train(const std::vector< std::vector< double >> &training_data, int epochs) override
Trains the sparse autoencoder on the given training data.
SparseAutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate, double sparsity_weight, double sparsity_target)
Constructor for the SparseAutoEncoder class.
std::vector< double > encoder(const std::vector< double > &input)
Performs the forward pass for the encoder and returns hidden activations.
void gradient_descent(const std::vector< double > &input, const std::vector< double > &output, const std::vector< double > &hidden_sampled)
Performs the backward pass (gradient descent) updating weights.
double sample_dist()
Samples from a standard normal distribution.
double reparameterize(double mean, double log_variance)
Reparameterization trick for variational autoencoders.
VariationalAutoEncoder(int input_size, int hidden_size, int output_size, double learning_rate)
Constructor for the VariationalAutoEncoder class.
virtual void train(const std::vector< std::vector< double >> &training_data, int epochs) override
Trains the Variational autoencoder on the given training data.
std::vector< double > decoder(const std::vector< double > &hidden_sampled)
Performs the forward pass for the decoder and returns output activations.
Auto Encoder Neural Network effecient for unlabeled data.