44 #include "../core/datatable.hpp"
45 #include "../core/utils.hpp"
56 std::vector<long double>
x;
58 std::vector<long double>
y;
138 void get_input(
const std::vector<long double> &x_data,
139 const std::vector<long double> &y_data);
149 const std::vector<std::string> &columns);
168 void split_data(
double test_size,
unsigned int seed,
bool shuffle);
180 long double predict(
long double _x)
const;
188 long double predict(
long double _x,
const std::vector<long double> &x_data);
202 long double error_in(
long double num);
218 long double error_in(
long double num,
219 const std::vector<long double> &x_data,
220 const std::vector<long double> &y_data);
245 long double mse(
const std::vector<long double> &x_data,
246 const std::vector<long double> &y_data)
const;
273 long double r_sqrd(
const std::vector<long double> &x_data,
274 const std::vector<long double> &y_data)
const;
281 int64_t
num_rows(
const char *input);
void best_fit()
Calculates and displays the best fitting line based on training data.
LinearRegression()
Constructor for LinearRegression.
void show_data()
Display the data set.
long double mse(const std::vector< long double > &x_data, const std::vector< long double > &y_data) const
Calculates the Mean Squared Error (MSE) for a dataset.
long double error_in(long double num)
Calculates the error (residual) for a given independent variable value.
long double predict(long double _x) const
Predict a value based on the input.
long double return_coeffecient()
Get the coefficient/slope of the best fitting line.
std::vector< long double > y
int64_t data_size()
Get the number of entries (xi, yi) in the data set.
std::vector< long double > y_train
long double error_square()
Calculates the sum of squared errors for the entire dataset.
void split_data(double test_size, unsigned int seed, bool shuffle)
Splits the data into training and testing sets.
std::vector< long double > x_test
long double r_sqrd(const std::vector< long double > &x_data, const std::vector< long double > &y_data) const
Calculate the coefficient of determination (R-squared).
std::vector< long double > y_test
std::vector< long double > x
void calculate_coeffecient()
Calculates the coefficient/slope of the best fitting line.
long double return_constant()
Get the constant term of the best fitting line.
int64_t num_rows(const char *input)
Calculate the number of rows in a file.
void get_input(const std::vector< long double > &x_data, const std::vector< long double > &y_data)
Sets the input data for the LinearRegression class from two vectors.
std::vector< long double > x_train
void calculate_constant()
Calculate the constant term of the best fitting line.
std::pair< std::vector< std::string >, std::vector< std::vector< std::string > > > DataTableStr
The source C++ openGPMP namespace.