Simple test driver showing how to use simple linear regression on a 2-column dataset. The current linear regression method is not capable of multiple linear regression, to generate use of this implementation data sets must either be two column in nature or users must disect data for a proper key,value response
#include <iostream>
#include <stdio.h>
#include <openGPMP/gpmp.h>
dt.
csv_read(
"../../data/school_scores.csv", {
"Year",
"GPA"});
std::vector<std::string> columns = {"Year", "GPA"};
int v1 = 2007;
"Predicted value at " + std::to_string(v1) + " = " +
std::to_string(v1_v));
"Error value at " + std::to_string(v1) + " = " +
std::to_string(v1_e));
printf("Predicted value at %d = %f\n", v1, v1_v_after_split);
printf("Error value at %d = %f\n\n", v1, v1_e_after_split);
"Predicted value at " + std::to_string(v1) + " = " +
std::to_string(v1_v_after_split));
"Error value at " + std::to_string(v1) + " = " +
std::to_string(v1_e_after_split));
_log_.
log(
INFO,
"MSE = " + std::to_string(mse_after_split));
_log_.
log(
INFO,
"R2 score = " + std::to_string(r_squared_after_split));
}
return 0;
}
DataTableStr csv_read(std::string filename, std::vector< std::string > columns={})
Reads a CSV file and returns a DataTableStr parses CSV files and stores all data as strings.
void log(LogLevel level, const std::string &message)
Logs a message with the specified log level.
void best_fit()
Calculates and displays the best fitting line based on training data.
long double mse(const std::vector< long double > &x_data, const std::vector< long double > &y_data) const
Calculates the Mean Squared Error (MSE) for a dataset.
long double error_in(long double num)
Calculates the error (residual) for a given independent variable value.
long double predict(long double _x) const
Predict a value based on the input.
std::vector< long double > y
void split_data(double test_size, unsigned int seed, bool shuffle)
Splits the data into training and testing sets.
std::vector< long double > x_test
long double r_sqrd(const std::vector< long double > &x_data, const std::vector< long double > &y_data) const
Calculate the coefficient of determination (R-squared).
std::vector< long double > y_test
std::vector< long double > x
void get_input(const std::vector< long double > &x_data, const std::vector< long double > &y_data)
Sets the input data for the LinearRegression class from two vectors.
static gpmp::core::Logger _log_
std::pair< std::vector< std::string >, std::vector< std::vector< std::string > > > DataTableStr