openGPMP
Open Source Mathematics Package
datatable2.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file.
25  *
26  *
27  *
28  * This software is distributed on an AS IS basis, WITHOUT
29  * WARRANTY OF ANY KIND, either express or implied.
30  *
31  ************************************************************************/
32 #include <algorithm>
33 #include <cmath>
34 #include <cstdint>
35 #include <cstring>
36 #include <fcntl.h>
37 #include <iomanip>
38 #include <iostream>
39 #include <mutex>
41 #include <openGPMP/core/utils.hpp>
42 #include <regex>
43 #include <string>
44 #include <sys/mman.h>
45 #include <sys/stat.h>
46 #include <thread>
47 #include <typeinfo>
48 #include <unistd.h>
49 #include <unordered_set>
50 #include <variant>
51 #include <vector>
52 
55 
56 // create method to create datatable from scratch? insert, drop, etc?
57 
58 // Function to check if a string is an integer
59 bool is_int(const std::string &str) {
60  // TODO : determine type of int based on length of largest val?
61  return std::regex_match(str, std::regex(R"(-?\d+)"));
62 }
63 
64 // Function to check if a string is a double
65 bool is_double(const std::string &str) {
66  return std::regex_match(str, std::regex(R"(-?\d+\.\d+)"));
67 }
68 
69 void handle_error(const char *msg) {
70  perror(msg);
71  exit(255);
72 }
73 
74 const char *map_file(const char *fname, size_t &length) {
75  int fd = open(fname, O_RDONLY);
76  if (fd == -1)
77  handle_error("open");
78 
79  // Obtain file size
80  struct stat sb;
81  if (fstat(fd, &sb) == -1)
82  handle_error("fstat");
83 
84  length = sb.st_size;
85 
86  const char *addr = static_cast<const char *>(
87  mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0u));
88  if (addr == MAP_FAILED)
89  handle_error("mmap");
90 
91  // TODO: Close fd at some point in time, call munmap(...)
92  return addr;
93 }
94 
95 // TODO : optimize these methods, CSV reader using threads? loop unrolling?,
96 // etc? conversion functions to be quicker,
98 gpmp::core::DataTable::csv_read(std::string filename,
99  std::vector<std::string> columns) {
100  std::ifstream file(filename);
101  file.rdbuf()->pubsetbuf(nullptr, 0); // Disable buffering
102 
103  if (!file.is_open()) {
104  _log_.log(ERROR, "Unable to open file: " + filename + ".");
105  throw std::runtime_error("Unable to open file: " + filename + ".");
106  }
107 
109  std::string line;
110 
111  // Read the header line once and parse the column names
112  if (!getline(file, line)) {
113  _log_.log(ERROR, "Empty file: " + filename + ".");
114  throw std::runtime_error("Empty file: " + filename + ".");
115  }
116 
117  std::stringstream header(line);
118  std::vector<std::string> header_cols;
119  std::string column_name;
120 
121  while (getline(header, column_name, ',')) {
122  header_cols.emplace_back(column_name);
123  }
124 
125  // If no columns are specified, read in all columns
126  if (columns.empty()) {
127  columns = header_cols;
128  }
129 
130  // Check if specified columns exist in the header
131  for (const auto &column : columns) {
132  if (std::find(header_cols.begin(), header_cols.end(), column) ==
133  header_cols.end()) {
134  _log_.log(ERROR, "Column: " + column + " not found");
135  throw std::runtime_error("Column: " + column + " not found");
136  }
137  }
138 
139  // Reuse row_vector for each row to reduce memory overhead
140  std::vector<std::variant<int64_t, long double, std::string>> row_vector;
141 
142  while (getline(file, line)) {
143  std::stringstream rowStream(line);
144  std::string value;
145  int columnIndex = 0;
146 
147  while (getline(rowStream, value, ',')) {
148  if (std::find(columns.begin(),
149  columns.end(),
150  header_cols[columnIndex]) != columns.end()) {
151  // Check if the value contains exactly 1 decimal point
152  size_t decimalPointCount =
153  std::count(value.begin(), value.end(), '.');
154  if (decimalPointCount == 1) {
155  try {
156  long double double_value = std::stold(value);
157  row_vector.emplace_back(double_value);
158  } catch (const std::invalid_argument &) {
159  row_vector.emplace_back(value);
160  }
161  } else if ((value.find_first_not_of("0123456789-") ==
162  std::string::npos) &&
163  (std::count(value.begin(), value.end(), '-') <= 1)) {
164  try {
165  int64_t int_value = std::stoll(value);
166  row_vector.emplace_back(int_value);
167  } catch (const std::invalid_argument &) {
168  row_vector.emplace_back(value);
169  }
170  } else {
171  row_vector.emplace_back(value);
172  }
173  }
174  columnIndex++;
175  }
176 
177  if (!row_vector.empty()) {
178  data.emplace_back(row_vector);
179  row_vector.clear();
180  }
181  }
182 
183  file.close();
184 
185  // Populate headers_ class variable
186  headers_ = columns;
187  // Populate data_ class variable
188  data_ = data;
189 
190  return make_pair(columns, data);
191 }
192 
193 // The main DataTable display method
194 // TODO : edit this display method to read in the first 15 and last 15 by
195 // default. if display_all = true then fetch all rows
197  bool display_all) {
198  int num_columns = data.first.size();
199  int num_rows = data.second.size();
200  int num_omitted_rows = 0;
201 
202  std::vector<int> max_column_widths(num_columns, 0);
203 
204  // Calculate the maximum width for each column based on column headers
205  for (int i = 0; i < num_columns; i++) {
206  max_column_widths[i] = data.first[i].length();
207  }
208 
209  // Calculate the maximum width for each column based on data rows
210  for (int i = 0; i < num_columns; i++) {
211  for (const auto &row : data.second) {
212  if (i < static_cast<int>(row.size())) {
213  std::visit(
214  [&max_column_widths, &i](const auto &cellValue) {
215  using T = std::decay_t<decltype(cellValue)>;
216  if constexpr (std::is_same_v<T, std::string>) {
217  max_column_widths[i] =
218  std::max(max_column_widths[i],
219  static_cast<int>(cellValue.length()));
220  } else if constexpr (std::is_integral_v<T> ||
221  std::is_floating_point_v<T>) {
222  max_column_widths[i] = std::max(
223  max_column_widths[i],
224  static_cast<int>(
225  std::to_string(cellValue).length()));
226  }
227  },
228  row[i]);
229  }
230  }
231  }
232 
233  const int dateTimeColumnIndex = 0;
234  max_column_widths[dateTimeColumnIndex] =
235  std::max(max_column_widths[dateTimeColumnIndex], 0);
236 
237  // Define a function to print a row
238  auto printRow = [&data, &max_column_widths, num_columns](int row_index) {
239  std::cout << std::setw(7) << std::right << row_index << " ";
240 
241  for (int j = 0; j < num_columns; j++) {
242  if (j < static_cast<int>(data.second[row_index].size())) {
243  std::visit(
244  [&max_column_widths, &j](const auto &cellValue) {
245  using T = std::decay_t<decltype(cellValue)>;
246  if constexpr (std::is_same_v<T, double> ||
247  std::is_same_v<T, long double>) {
248  // Convert the value to a string without trailing
249  // zeros
250  std::string cellValueStr =
251  std::to_string(cellValue);
252  cellValueStr.erase(
253  cellValueStr.find_last_not_of('0') + 1,
254  std::string::npos);
255  cellValueStr.erase(
256  cellValueStr.find_last_not_of('.') + 1,
257  std::string::npos);
258 
259  std::cout << std::setw(max_column_widths[j])
260  << std::right << cellValueStr << " ";
261  } else {
262  std::cout << std::setw(max_column_widths[j])
263  << std::right << cellValue << " ";
264  }
265  },
266  data.second[row_index][j]);
267  }
268  }
269 
270  std::cout << std::endl;
271  };
272 
273  // Print headers
274  std::cout << std::setw(7) << std::right << "Index"
275  << " ";
276  for (int i = 0; i < num_columns; i++) {
277  std::cout << std::setw(max_column_widths[i]) << std::right
278  << data.first[i] << " ";
279  }
280  std::cout << std::endl;
281 
282  int num_elements = data.second.size();
283  if (!display_all && num_elements > MAX_ROWS) {
284  for (int i = 0; i < SHOW_ROWS; i++) {
285  printRow(i);
286  }
287  num_omitted_rows = num_elements - MAX_ROWS;
288  std::cout << "...\n";
289  std::cout << "[" << num_omitted_rows << " rows omitted]\n";
290  for (int i = num_elements - SHOW_ROWS; i < num_elements; i++) {
291  printRow(i);
292  }
293  } else {
294  // Print all rows
295  for (int i = 0; i < num_elements; i++) {
296  printRow(i);
297  }
298  }
299 
300  // Print the number of rows and columns
301  std::cout << "[" << num_rows << " rows"
302  << " x " << num_columns << " columns";
303  std::cout << "]\n\n";
304 }
305 
306 // Overload method for display(). Allows user to display the existing
307 // data in a DataTable object.
308 void gpmp::core::DataTable::display(bool display_all) {
309  display(std::make_pair(headers_, data_), display_all);
310 }
311 
313 gpmp::core::DataTable::inferType(const std::vector<std::string> &column) {
314  int integer_count = 0;
315  int double_count = 0;
316  int string_count = 0;
317 
318  for (const std::string &cell : column) {
319  if (is_int(cell)) {
320  integer_count++;
321  } else if (is_double(cell)) {
322  double_count++;
323  } else {
324  string_count++;
325  }
326  }
327 
328  _log_.log(INFO,
329  "int/double/str: " + std::to_string(integer_count) + "/" +
330  std::to_string(double_count) + "/" +
331  std::to_string(string_count));
332 
333  if (integer_count > double_count) {
334  return DataType::dt_int32;
335  } else if (double_count > integer_count) {
336  return DataType::dt_double;
337  } else {
338  return DataType::dt_str;
339  }
340 }
341 std::string dt_to_str(gpmp::core::DataType type) {
342  switch (type) {
344  return "int64";
346  return "long double";
348  return "std::string";
349  // TODO : Add more cases if needed
350  default:
351  return "Unknown";
352  }
353 }
354 
356  const std::vector<std::string> &skip_columns) {
357  gpmp::core::TableType mixed_data;
358  std::cout << "HEADERS:" << headers_.size() << std::endl;
359  std::cout << "ROWS:" << data_.size() << std::endl;
360 
361  // Traverse column headers, skipping specified columns
362  for (const std::string &header : headers_) {
363 
364  if (std::find(skip_columns.begin(), skip_columns.end(), header) !=
365  skip_columns.end()) {
366  continue; // Skip this column
367  }
368 
369  std::cout << header << " ";
370  // push column headers into mixed_data var
371  mixed_data.first.emplace_back(header);
372  }
373 
374  std::cout << std::endl;
375 
376  // Traverse rows, skip rows of the specified columns
377  for (size_t col = 0; col < headers_.size(); ++col) {
378  if (std::find(skip_columns.begin(),
379  skip_columns.end(),
380  headers_[col]) != skip_columns.end()) {
381  continue; // Skip this column
382  }
383  _log_.log(INFO, "Column: " + headers_[col]);
384 
385  // Collect data for this column
386  std::vector<std::string> column_data;
387  for (const std::vector<std::variant<int64_t, long double, std::string>>
388  &row : data_) {
389  column_data.emplace_back(
390  std::get<std::string>(row[col])); // Convert variant to string
391  }
392 
393  std::vector<std::variant<int64_t, long double, std::string>>
394  converted_data;
395 
396  // Call inferType on the column's data
397  gpmp::core::DataType column_type = inferType(column_data);
398 
399  _log_.log(INFO, "Using type: " + dt_to_str(column_type));
400 
401  // Check type and convert rows
402  if (column_type == gpmp::core::DataType::dt_int32) {
403  std::cout << "INT\n";
404  for (const std::string &cell : column_data) {
405  converted_data.emplace_back(std::stoi(cell));
406  }
407  } else if (column_type == gpmp::core::DataType::dt_double) {
408  std::cout << "DOUBLE\n";
409  for (const std::string &cell : column_data) {
410  converted_data.emplace_back(std::stod(cell));
411  }
412  } else {
413  std::cout << "STRING\n";
414  for (const std::string &cell : column_data) {
415  converted_data.emplace_back(cell);
416  }
417  }
418  // push rows into the mixed_data var
419  mixed_data.second.emplace_back(converted_data);
420  }
421 
422  /*std::cout << "Mixed Data:" << std::endl;
423  for (const std::string &header : mixed_data.first) {
424  std::cout << header << " ";
425  }
426  std::cout << std::endl;
427 
428  for (const auto &row : mixed_data.second) {
429  for (const auto &cell : row) {
430  if (std::holds_alternative<int64_t>(cell)) {
431  std::cout << std::get<int64_t>(cell) << " ";
432  } else if (std::holds_alternative<long double>(cell)) {
433  std::cout << std::get<long double>(cell) << " ";
434  } else if (std::holds_alternative<std::string>(cell)) {
435  std::cout << std::get<std::string>(cell) << " ";
436  }
437  }
438  std::cout << std::endl;
439  }*/
440 
441  return mixed_data;
442 }
443 
444 // Extracts date/time information from given column
445 // TODO: add additional options for detecting/converting date/time columns
446 // to numeric formats
447 /*
448 gpmp::core::DataTableStr
449 gpmp::core::DataTable::datetime(std::string column_name,
450  bool extract_year,
451  bool extract_month,
452  bool extract_time) {
453  // Find the index of the specified column
454  auto column_iter = std::find(headers_.begin(), headers_.end(), column_name);
455  if (column_iter == headers_.end()) {
456  _log_.log(ERROR, "Column: " + column_name + " node found");
457  exit(EXIT_FAILURE);
458  }
459  int column_index = std::distance(headers_.begin(), column_iter);
460 
461  // Extract components from each row
462  std::vector<std::string> new_headers = headers_;
463  std::vector<std::vector<std::string>> new_data;
464 
465  // Iterate and populate the additional columns
466  for (size_t row_index = 0; row_index < data_.size(); ++row_index) {
467  std::vector<std::string> row = data_[row_index];
468  // If column row is not found
469  if (row.size() <= static_cast<size_t>(column_index)) {
470  _log_.log(ERROR, "Column: " + column_name + " not found");
471 
472  exit(EXIT_FAILURE);
473  }
474 
475  std::string timestamp = row[column_index];
476  std::string year, month, time;
477 
478  // Create a new row with extracted components
479  std::vector<std::string> new_row;
480 
481  // Extract year, month, and time components
482  if (extract_year) {
483  year = timestamp.substr(timestamp.find_last_of('/') + 1, 4);
484  new_row.emplace_back(year);
485  }
486  if (extract_month) {
487  month = timestamp.substr(0, timestamp.find_first_of('/'));
488  new_row.emplace_back(month);
489  }
490  if (extract_time) {
491  time = timestamp.substr(timestamp.find(' ') + 1);
492  new_row.emplace_back(time);
493  }
494 
495  // append original row data
496  new_row.insert(new_row.end(), row.begin(), row.end());
497  // add new rows
498  new_data.emplace_back(new_row);
499  }
500 
501  // Create new headers based on the extracted components
502  if (extract_month)
503  new_headers.insert(new_headers.begin(), "Month");
504  if (extract_year)
505  new_headers.insert(new_headers.begin(), "Year");
506  if (extract_time)
507  new_headers.insert(new_headers.begin(), "Time");
508 
509  // set class car data_ to hold rows/lines
510  data_ = new_data;
511  // set class var modified headers to new headers
512  // new_headers_ = new_headers;
513  headers_ = new_headers;
514 
515  return std::make_pair(new_headers, new_data);
516 }
517 
518 // Sort specified columns, by default in asending order
519 void gpmp::core::DataTable::sort(const std::vector<std::string> &sort_columns,
520  bool ascending) {
521  // Extract the column indices to be sorted by from the original data
522  std::vector<size_t> column_indices;
523  for (const std::string &column : sort_columns) {
524  auto iter = std::find(headers_.begin(), headers_.end(), column);
525  if (iter != headers_.end()) {
526  size_t index = std::distance(headers_.begin(), iter);
527  column_indices.emplace_back(index);
528  }
529  }
530 
531  // Sort the data based on the specified columns
532  std::stable_sort(data_.begin(),
533  data_.end(),
534  [&](const std::vector<std::string> &row1,
535  const std::vector<std::string> &row2) {
536  for (size_t index : column_indices) {
537  if (row1[index] != row2[index]) {
538  if (ascending) {
539  return row1[index] < row2[index];
540  } else {
541  return row1[index] > row2[index];
542  }
543  }
544  }
545  // Rows are equal, nothing to sort
546  return false;
547  });
548 }
549 
550 // Group rows by specific columns
551 std::vector<gpmp::core::DataTableStr>
552 gpmp::core::DataTable::group_by(std::vector<std::string> group_by_columns) {
553  // Find the indices of the specified group by columns
554  std::vector<int> group_by_indices;
555 
556  // Traverse group column names
557  for (const std::string &column_name : group_by_columns) {
558  std::cout << "Searching for column: " << column_name << std::endl;
559 
560  // Find start/end and match column name
561  auto column_iter =
562  std::find(headers_.begin(), headers_.end(), column_name);
563 
564  // If no columns
565  if (column_iter == headers_.end()) {
566  _log_.log(ERROR, "Column: " + column_name + " not found");
567  exit(EXIT_FAILURE);
568  }
569  // column index set to distance from start of first col to nexter iter
570  int column_index = std::distance(headers_.begin(), column_iter);
571  // add column index to group
572  group_by_indices.emplace_back(column_index);
573  }
574 
575  // Group the data based on the specified columns using a vector
576  std::vector<std::pair<std::vector<std::string>, gpmp::core::DataTableStr>>
577  groups;
578 
579  // Traverse row/line data
580  for (const std::vector<std::string> &row : data_) {
581  // store group key for each row
582  std::vector<std::string> group_key;
583  // Fill group key from specified group column names
584  for (int index : group_by_indices) {
585  group_key.emplace_back(row[index]);
586  }
587 
588  // Check if the group already exists
589  auto group_iter = std::find_if(
590  groups.begin(),
591  groups.end(),
592  [&group_key](const std::pair<std::vector<std::string>,
593  gpmp::core::DataTableStr> &group) {
594  return group.first == group_key;
595  });
596  // If the group DNE create a new one to add to groups vector
597  if (group_iter == groups.end()) {
598  // Create a new group
599  groups.emplace_back(
600  {group_key, gpmp::core::DataTableStr(headers_, {})});
601  group_iter = groups.end() - 1;
602  }
603  // Add current row to group
604  group_iter->second.second.emplace_back(row);
605  }
606 
607  // Extract the grouped data into a vector
608  std::vector<gpmp::core::DataTableStr> grouped_data;
609  // Iterate over sorted groups to push onto result vector
610  for (const auto &group : groups) {
611  grouped_data.emplace_back(group.second);
612  }
613 
614  // Return final DataTableStr type
615  return grouped_data;
616 }
617 
618 // Get first element of each created group
619 gpmp::core::DataTableStr gpmp::core::DataTable::first(
620  const std::vector<gpmp::core::DataTableStr> &groups) const {
621  if (groups.empty()) {
622  // Handle the case when there are no groups
623  return std::make_pair(std::vector<std::string>(),
624  std::vector<std::vector<std::string>>());
625  }
626 
627  std::vector<std::vector<std::string>> first_rows;
628 
629  for (const gpmp::core::DataTableStr &group : groups) {
630  if (!group.second.empty()) {
631  first_rows.emplace_back(
632  group.second[0]); // Get the first row of each group
633  }
634  }
635 
636  if (!first_rows.empty()) {
637  // Assuming all groups have the same headers as the first group
638  return std::make_pair(groups[0].first, first_rows);
639  } else {
640  // Handle the case when there are no first rows found.
641  return std::make_pair(groups[0].first,
642  std::vector<std::vector<std::string>>());
643  }
644 }
645 */
646 
648  // Calculate memory usage for each column and keep track of data type
649  std::vector<double> column_memory_usages(headers_.size(), 0.0);
650  std::vector<std::string> column_data_types(headers_.size());
651  double total_memory_usage_kb = 0.0;
652 
653  // Calculate memory usage in bytes for the entire table
654  size_t memory_usage_bytes = sizeof(headers_);
655  for (const auto &row : data_) {
656  for (size_t i = 0; i < row.size(); ++i) {
657  if (std::holds_alternative<int64_t>(row[i])) {
658  memory_usage_bytes += sizeof(int64_t);
659  column_memory_usages[i] +=
660  static_cast<double>(sizeof(int64_t)) / 1024.0;
661  column_data_types[i] = "int64_t";
662  } else if (std::holds_alternative<long double>(row[i])) {
663  memory_usage_bytes += sizeof(long double);
664  column_memory_usages[i] +=
665  static_cast<double>(sizeof(long double)) / 1024.0;
666  column_data_types[i] = "long double";
667  } else if (std::holds_alternative<std::string>(row[i])) {
668  memory_usage_bytes += std::get<std::string>(row[i]).capacity();
669  column_memory_usages[i] +=
670  static_cast<double>(
671  std::get<std::string>(row[i]).capacity()) /
672  1024.0;
673  column_data_types[i] = "std::string";
674  }
675  }
676  }
677 
678  // Convert total memory usage to KB
679  total_memory_usage_kb = static_cast<double>(memory_usage_bytes) / 1024.0;
680 
681  // Find the maximum column name length
682  size_t max_column_name_length = 0;
683  for (const std::string &column : headers_) {
684  max_column_name_length =
685  std::max(max_column_name_length, column.length());
686  }
687 
688  // Find the maximum data type length
689  size_t max_data_type_length = 0;
690  for (const std::string &data_type : column_data_types) {
691  max_data_type_length =
692  std::max(max_data_type_length, data_type.length());
693  }
694 
695  // Set the column width for formatting
696  int column_width = static_cast<int>(std::max(max_column_name_length,
697  max_data_type_length)) +
698  2; // Add extra padding
699 
700  // Print header
701  std::cout << std::left << std::setw(column_width) << "Column"
702  << std::setw(column_width) << "Type" << std::setw(column_width)
703  << "Memory Usage (KB)" << std::endl;
704 
705  // Print data
706  for (size_t i = 0; i < headers_.size(); ++i) {
707  std::cout << std::left << std::setw(column_width) << headers_[i]
708  << std::setw(column_width) << column_data_types[i]
709  << std::setw(column_width) << std::fixed
710  << std::setprecision(2) << column_memory_usages[i]
711  << std::endl;
712  }
713 
714  // Print total table memory usage
715  std::cout << "\nTotal Memory Usage: " << std::fixed << std::setprecision(2)
716  << total_memory_usage_kb << " KB" << std::endl;
717 }
718 
719 // Prints some basic information about a DataTable object
723 
724  for (const auto &v : src.first) {
725  // check if v contains only digits
726  if (std::regex_match(v, std::regex("\\d+"))) {
727  dest.first.emplace_back(std::stoi(v));
728  }
729  }
730  for (const auto &vv : src.second) {
731  std::vector<int64_t> new_vec;
732  for (const auto &v : vv) {
733  // check if v contains only digits
734  if (std::regex_match(v, std::regex("\\d+"))) {
735  new_vec.emplace_back(std::stoi(v));
736  }
737  }
738  dest.second.emplace_back(new_vec);
739  }
740  return dest;
741 }
742 
746 
747  for (const auto &v : src.first) {
748  if (std::regex_match(v, std::regex("[-+]?\\d*\\.?\\d+"))) {
749  dest.first.emplace_back(std::stold(v));
750  }
751  }
752 
753  for (const auto &vv : src.second) {
754  std::vector<long double> new_vec;
755  for (const auto &v : vv) {
756  if (std::regex_match(v, std::regex("[-+]?\\d*\\.?\\d+"))) {
757  new_vec.emplace_back(std::stold(v));
758  }
759  }
760  dest.second.emplace_back(new_vec);
761  }
762 
763  return dest;
764 }
void display()
Definition: Mandelbrot.c:82
DataType inferType(const std::vector< std::string > &column)
Definition: datatable1.cpp:135
std::vector< std::vector< std::string > > data_
Definition: datatable.hpp:85
DataTableStr csv_read(std::string filename, std::vector< std::string > columns={})
Reads a CSV file and returns a DataTableStr parses CSV files and stores all data as strings.
Definition: datatable.cpp:57
void info()
Displays data types and null vals for each column.
Definition: datatable2.cpp:647
TableType native_type(const std::vector< std::string > &skip_columns={})
Converts DataTable column's rows to their native types. Since the existing DataTable read/load relate...
Definition: datatable1.cpp:176
std::vector< std::string > headers_
Definition: datatable.hpp:79
void display(std::pair< std::vector< T >, std::vector< std::vector< T >>> data, bool display_all=false)
Sort a DataTable based on a specified column.
Definition: datatable.hpp:216
DataTableInt str_to_int(DataTableStr src)
Converts a DataTableStr to a DataTableInt.
Definition: datatable.cpp:347
DataTableDouble str_to_double(DataTableStr src)
Converts a DataTableStr to a DataTableDouble.
Definition: datatable.cpp:370
void log(LogLevel level, const std::string &message)
Logs a message with the specified log level.
Definition: utils.cpp:77
std::string dt_to_str(gpmp::core::DataType type)
Definition: datatable2.cpp:341
const char * map_file(const char *fname, size_t &length)
Definition: datatable2.cpp:74
void handle_error(const char *msg)
Definition: datatable2.cpp:69
bool is_double(const std::string &str)
Definition: datatable2.cpp:65
static gpmp::core::Logger _log_
Definition: datatable2.cpp:54
bool is_int(const std::string &str)
Definition: datatable2.cpp:59
#define MAX_ROWS
Definition: datatable.hpp:41
#define SHOW_ROWS
Definition: datatable.hpp:42
std::pair< std::vector< long double >, std::vector< std::vector< long double > > > DataTableDouble
Definition: datatable.hpp:74
std::pair< std::vector< std::string >, std::vector< std::vector< std::string > > > DataTableStr
Definition: datatable.hpp:65
std::pair< std::vector< std::string >, std::vector< std::vector< std::variant< int64_t, long double, std::string > > > > TableType
DataType
enum for representing different data types
Definition: datatable.hpp:59
std::pair< std::vector< int64_t >, std::vector< std::vector< int64_t > > > DataTableInt
Definition: datatable.hpp:69
std::vector< std::vector< std::variant< int64_t, long double, std::string > > > MixedType
static GLfloat u
Miscellaneous utilities methods related to openGPMP.
@ ERROR
Definition: utils.hpp:48
@ INFO
Definition: utils.hpp:48