openGPMP
Open Source Mathematics Package
linreg.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 
34 /*
35  * Linear regression is a statistical method for modeling
36  * relationships between a dependent variable with a given
37  * independent variable. Multiple linear regression follows
38  * the same idea with multiple or a set of independent
39  * variables.
40  */
41 
42 #ifndef LINREG_HPP
43 #define LINREG_HPP
44 #include "../core/datatable.hpp"
45 #include "../core/utils.hpp"
46 #include <cstdint>
47 #include <vector>
48 
49 namespace gpmp {
50 
51 namespace ml {
52 
54  public:
56  std::vector<long double> x;
58  std::vector<long double> y;
60  long double coeff;
62  long double constant;
64  long double sum_xy;
66  long double sum_x;
68  long double sum_y;
70  long double sum_x_square;
72  long double sum_y_square;
74  std::vector<long double> x_train;
76  std::vector<long double> y_train;
78  std::vector<long double> x_test;
80  std::vector<long double> y_test;
81 
86 
93  void calculate_coeffecient();
94 
98  void calculate_constant();
99 
104  int64_t data_size();
105 
110  long double return_coeffecient();
111 
116  long double return_constant();
117 
126  void best_fit();
127 
138  void get_input(const std::vector<long double> &x_data,
139  const std::vector<long double> &y_data);
140 
148  void get_input(const gpmp::core::DataTableStr &data,
149  const std::vector<std::string> &columns);
150 
156  void get_input(const char *file);
157 
168  void split_data(double test_size, unsigned int seed, bool shuffle);
169 
173  void show_data();
174 
180  long double predict(long double _x) const;
181 
188  long double predict(long double _x, const std::vector<long double> &x_data);
189 
202  long double error_in(long double num);
203 
218  long double error_in(long double num,
219  const std::vector<long double> &x_data,
220  const std::vector<long double> &y_data);
221 
231  long double error_square();
232 
245  long double mse(const std::vector<long double> &x_data,
246  const std::vector<long double> &y_data) const;
247 
273  long double r_sqrd(const std::vector<long double> &x_data,
274  const std::vector<long double> &y_data) const;
275 
281  int64_t num_rows(const char *input);
282 };
283 
284 } // namespace ml
285 
286 } // namespace gpmp
287 
288 #endif
void best_fit()
Calculates and displays the best fitting line based on training data.
Definition: linreg.cpp:110
LinearRegression()
Constructor for LinearRegression.
Definition: linreg.cpp:54
void show_data()
Display the data set.
Definition: linreg.cpp:337
long double mse(const std::vector< long double > &x_data, const std::vector< long double > &y_data) const
Calculates the Mean Squared Error (MSE) for a dataset.
Definition: linreg.cpp:412
long double error_in(long double num)
Calculates the error (residual) for a given independent variable value.
Definition: linreg.cpp:373
long double predict(long double _x) const
Predict a value based on the input.
Definition: linreg.cpp:356
long double return_coeffecient()
Get the coefficient/slope of the best fitting line.
Definition: linreg.cpp:94
std::vector< long double > y
Definition: linreg.hpp:58
long double sum_x_square
Definition: linreg.hpp:70
int64_t data_size()
Get the number of entries (xi, yi) in the data set.
Definition: linreg.cpp:89
std::vector< long double > y_train
Definition: linreg.hpp:76
long double error_square()
Calculates the sum of squared errors for the entire dataset.
Definition: linreg.cpp:398
void split_data(double test_size, unsigned int seed, bool shuffle)
Splits the data into training and testing sets.
Definition: linreg.cpp:279
std::vector< long double > x_test
Definition: linreg.hpp:78
long double r_sqrd(const std::vector< long double > &x_data, const std::vector< long double > &y_data) const
Calculate the coefficient of determination (R-squared).
Definition: linreg.cpp:438
std::vector< long double > y_test
Definition: linreg.hpp:80
std::vector< long double > x
Definition: linreg.hpp:56
long double sum_y_square
Definition: linreg.hpp:72
void calculate_coeffecient()
Calculates the coefficient/slope of the best fitting line.
Definition: linreg.cpp:66
long double return_constant()
Get the constant term of the best fitting line.
Definition: linreg.cpp:102
int64_t num_rows(const char *input)
Calculate the number of rows in a file.
Definition: linreg.cpp:474
void get_input(const std::vector< long double > &x_data, const std::vector< long double > &y_data)
Sets the input data for the LinearRegression class from two vectors.
Definition: linreg.cpp:157
std::vector< long double > x_train
Definition: linreg.hpp:74
void calculate_constant()
Calculate the constant term of the best fitting line.
Definition: linreg.cpp:80
std::pair< std::vector< std::string >, std::vector< std::vector< std::string > > > DataTableStr
Definition: datatable.hpp:65
The source C++ openGPMP namespace.