63 typedef std::pair<std::vector<std::string>,
64 std::vector<std::vector<std::string>>>
68 typedef std::pair<std::vector<int64_t>, std::vector<std::vector<int64_t>>>
72 typedef std::pair<std::vector<long double>,
73 std::vector<std::vector<long double>>>
81 std::vector<std::vector<std::string>>
rows_;
85 std::vector<std::vector<std::string>>
data_;
93 data_ = std::vector<std::vector<std::string>>();
94 headers_ = std::vector<std::string>();
106 std::vector<std::string> columns = {});
123 std::vector<std::string> columns = {});
134 std::vector<std::string> objs = {});
145 bool extract_year =
true,
146 bool extract_month =
true,
147 bool extract_time =
false);
155 void sort(
const std::vector<std::string> &sort_columns,
156 bool ascending =
true);
166 std::vector<DataTableStr>
167 group_by(std::vector<std::string> group_by_columns);
175 first(
const std::vector<gpmp::core::DataTableStr> &groups)
const;
215 template <
typename T>
216 void display(std::pair<std::vector<T>, std::vector<std::vector<T>>> data,
217 bool display_all =
false) {
219 int num_columns = data.first.size();
220 int num_rows = data.second.size();
221 int num_omitted_rows = 0;
224 std::vector<int> max_column_widths(num_columns, 0);
227 for (
int i = 0; i < num_columns; i++) {
228 max_column_widths[i] = data.first[i].length();
232 for (
int i = 0; i < num_columns; i++) {
233 for (
const auto &row : data.second) {
234 if (i <
static_cast<int>(row.size())) {
235 max_column_widths[i] =
236 std::max(max_column_widths[i],
237 static_cast<int>(row[i].length()));
244 const int dateTimeColumnIndex = 0;
246 max_column_widths[dateTimeColumnIndex] =
247 std::max(max_column_widths[dateTimeColumnIndex], 0);
250 std::cout << std::setw(7) << std::right <<
"Index"
253 for (
int i = 0; i < num_columns; i++) {
254 std::cout << std::setw(max_column_widths[i]) << std::right
255 << data.first[i] <<
" ";
257 std::cout << std::endl;
259 int num_elements = data.second.size();
260 if (!display_all && num_elements >
MAX_ROWS) {
263 std::cout << std::setw(7) << std::right << i <<
" ";
265 for (
int j = 0; j < num_columns; j++) {
266 if (j <
static_cast<int>(data.second[i].size())) {
267 std::cout << std::setw(max_column_widths[j])
268 << std::right << data.second[i][j] <<
" ";
271 std::cout << std::endl;
273 num_omitted_rows = num_elements -
MAX_ROWS;
274 std::cout <<
"...\n";
275 std::cout <<
"[" << num_omitted_rows <<
" rows omitted]\n";
276 for (
int i = num_elements -
SHOW_ROWS; i < num_elements; i++) {
277 std::cout << std::setw(7) << std::right << i <<
" ";
279 for (
int j = 0; j < num_columns; j++) {
280 if (j <
static_cast<int>(data.second[i].size())) {
281 std::cout << std::setw(max_column_widths[j])
282 << std::right << data.second[i][j] <<
" ";
285 std::cout << std::endl;
289 for (
int i = 0; i < num_elements; i++) {
292 std::cout << std::setw(7) << std::right << i <<
" ";
293 for (
int j = 0; j < num_columns; j++) {
294 if (j <
static_cast<int>(data.second[i].size())) {
297 std::cout << std::setw(max_column_widths[j])
298 << std::right << data.second[i][j] <<
" ";
301 std::cout << std::endl;
306 std::cout <<
"[" << num_rows <<
" rows"
307 <<
" x " << num_columns <<
" columns";
308 std::cout <<
"]\n\n";
DataTableStr json_read(std::string filename, std::vector< std::string > objs={})
Reads a JSON file and returns a DataTableStr parses JSON files and stores all data as strings.
std::vector< std::vector< std::string > > data_
std::vector< DataTableStr > group_by(std::vector< std::string > group_by_columns)
Groups the data by specified columns.
DataTableStr csv_read(std::string filename, std::vector< std::string > columns={})
Reads a CSV file and returns a DataTableStr parses CSV files and stores all data as strings.
void describe()
Prints some information about the DataTable.
void csv_write()
Write DataTable to a CSV file.
DataTableStr first(const std::vector< gpmp::core::DataTableStr > &groups) const
Gets the first element of each created group.
void sort(const std::vector< std::string > &sort_columns, bool ascending=true)
Sorts the rows of the DataTable based on specified columns.
std::vector< std::string > headers_
DataTableStr original_data_
DataTableStr datetime(std::string column_name, bool extract_year=true, bool extract_month=true, bool extract_time=false)
Extracts date and time components from a timestamp column.
std::vector< std::vector< std::string > > rows_
void display(std::pair< std::vector< T >, std::vector< std::vector< T >>> data, bool display_all=false)
Sort a DataTable based on a specified column.
DataTableInt str_to_int(DataTableStr src)
Converts a DataTableStr to a DataTableInt.
std::vector< std::string > new_headers_
DataTableStr tsv_read(std::string filename, std::vector< std::string > columns={})
Reads a TSV file and returns a DataTableStr parses TSV files and stores all data as strings.
void display(bool display_all=false)
Overload function for display() defaults to displaying what is currently stored in a DataTable object...
DataTableDouble str_to_double(DataTableStr src)
Converts a DataTableStr to a DataTableDouble.
std::pair< std::vector< long double >, std::vector< std::vector< long double > > > DataTableDouble
std::pair< std::vector< std::string >, std::vector< std::vector< std::string > > > DataTableStr
DataType
enum for representing different data types
std::pair< std::vector< int64_t >, std::vector< std::vector< int64_t > > > DataTableInt
The source C++ openGPMP namespace.