openGPMP
Open Source Mathematics Package
datatable_wip.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 
34 /*
35  * The datatable class similar to the Pandas Dataframe, R data.table, etc
36  * with many more limitations.
37  */
38 #ifndef DATATABLE_HPP
39 #define DATATABLE_HPP
40 
41 #define MAX_ROWS 30
42 #define SHOW_ROWS 15
43 
44 #include <iostream>
45 #include <string>
46 #include <variant>
47 #include <vector>
48 
49 namespace gpmp {
50 
51 namespace core {
52 
55 enum class DataType {
56  dt_uint8,
57  dt_int8,
58  dt_uint16,
59  dt_int16,
60  dt_uint32,
61  dt_int32,
62  dt_uint64,
63  dt_int64,
64  dt_double,
65  dt_ldouble,
66  dt_str,
67 };
68 
69 typedef std::pair<
70  std::vector<std::string>,
71  std::vector<std::vector<std::variant<int64_t, long double, std::string>>>>
73 
74 typedef std::vector<
75  std::vector<std::variant<int64_t, long double, std::string>>>
77 
80 typedef std::pair<std::vector<std::string>,
81  std::vector<std::vector<std::string>>>
85 typedef std::pair<std::vector<int64_t>, std::vector<std::vector<int64_t>>>
89 typedef std::pair<std::vector<long double>,
90  std::vector<std::vector<long double>>>
92 
93 class DataTable {
94  private:
95  // original DataTable object headers
96  std::vector<std::string> headers_;
97  // original DataTable object rows
99  // modified DataTable object headers
100  std::vector<std::string> new_headers_;
101  // vector to hold data
103 
104  // original DataTable data
106 
107  public:
112  // Initialize data_ and headers_ to empty vectors
113  headers_ = std::vector<std::string>();
114  data_ = MixedType();
115  }
116  void printData() {
117  // Print column headers
118  for (const auto &header : headers_) {
119  std::cout << header << "\t";
120  }
121  std::cout << std::endl;
122 
123  // Print data rows
124  for (const auto &row : data_) {
125  for (const auto &cell : row) {
126  // Check the type of cell and print accordingly
127  if (std::holds_alternative<int64_t>(cell)) {
128  std::cout << std::get<int64_t>(cell);
129  } else if (std::holds_alternative<long double>(cell)) {
130  std::cout << std::get<long double>(cell);
131  } else if (std::holds_alternative<std::string>(cell)) {
132  std::cout << std::get<std::string>(cell);
133  }
134 
135  std::cout << "\t";
136  }
137  std::cout << std::endl;
138  }
139  }
140 
149  // DataTableStr csv_read(std::string filename,
150  // std::vector<std::string> columns = {});
151 
152  TableType csv_read_new(std::string filename,
153  std::vector<std::string> columns = {});
154 
155  DataTableStr csv_read(std::string filename,
156  std::vector<std::string> columns);
157 
161  void csv_write();
162 
163  // TODO: TOML and JSON readers?
172  DataTableStr tsv_read(std::string filename,
173  std::vector<std::string> columns = {});
174 
183  DataTableStr json_read(std::string filename,
184  std::vector<std::string> objs = {});
185 
189  void drop(std::vector<std::string> column_name);
190 
199  DataTableStr datetime(std::string column_name,
200  bool extract_year = true,
201  bool extract_month = true,
202  bool extract_time = false);
203 
210  void sort(const std::vector<std::string> &sort_columns,
211  bool ascending = true);
212 
213  // customSort(const std::vector<std::string>& columnNames,
214  // gpmp::core::DataTableStr& data);
215 
221  std::vector<DataTableStr>
222  group_by(std::vector<std::string> group_by_columns);
223 
230  first(const std::vector<gpmp::core::DataTableStr> &groups) const;
231 
235  void describe();
236 
240  void info();
241 
248  TableType native_type(const std::vector<std::string> &skip_columns = {});
249 
250  DataType inferType(const std::vector<std::string> &column);
251 
262 
271 
285  // TODO : edit this display method to read in the first 15 and last 15 by
286  // default. if display_all = true then fetch all rows
287  void display(const TableType &data, bool display_all = false);
288 
294  void display(bool display_all = false);
295 };
296 
297 } // namespace core
298 } // namespace gpmp
299 
300 #endif // DATATABLE_HPP
DataTableStr json_read(std::string filename, std::vector< std::string > objs={})
Reads a JSON file and returns a DataTableStr parses JSON files and stores all data as strings.
std::vector< DataTableStr > group_by(std::vector< std::string > group_by_columns)
Groups the data by specified columns.
DataType inferType(const std::vector< std::string > &column)
Definition: datatable1.cpp:135
std::vector< std::vector< std::string > > data_
Definition: datatable.hpp:85
DataTableStr csv_read(std::string filename, std::vector< std::string > columns)
DataTableStr first(const std::vector< gpmp::core::DataTableStr > &groups) const
Gets the first element of each created group.
void describe()
Displays some information about the DataTable.
void csv_write()
Write DataTable to a CSV file.
void info()
Displays data types and null vals for each column.
Definition: datatable2.cpp:647
DataTableStr datetime(std::string column_name, bool extract_year=true, bool extract_month=true, bool extract_time=false)
Extracts date and time components from a timestamp column.
TableType native_type(const std::vector< std::string > &skip_columns={})
Converts DataTable column's rows to their native types. Since the existing DataTable read/load relate...
Definition: datatable1.cpp:176
void sort(const std::vector< std::string > &sort_columns, bool ascending=true)
Sorts the rows of the DataTable based on specified columns.
std::vector< std::string > headers_
Definition: datatable.hpp:79
DataTableStr original_data_
Definition: datatable.hpp:88
void drop(std::vector< std::string > column_name)
Drop specified rows from a DataTable.
void display(std::pair< std::vector< T >, std::vector< std::vector< T >>> data, bool display_all=false)
Sort a DataTable based on a specified column.
Definition: datatable.hpp:216
DataTableInt str_to_int(DataTableStr src)
Converts a DataTableStr to a DataTableInt.
DataTableDouble str_to_double(DataTableStr src)
Converts a DataTableStr to a DataTableDouble.
std::vector< std::string > new_headers_
Definition: datatable.hpp:83
DataTableStr tsv_read(std::string filename, std::vector< std::string > columns={})
Reads a TSV file and returns a DataTableStr parses TSV files and stores all data as strings.
void display(bool display_all=false)
Overload function for display() defaults to displaying what is currently stored in a DataTable object...
TableType csv_read_new(std::string filename, std::vector< std::string > columns={})
Reads a CSV file and returns a DataTableStr parses CSV files and stores all data as strings.
DataTable()
DataTable constructor. Initializes column & row storage.
std::pair< std::vector< long double >, std::vector< std::vector< long double > > > DataTableDouble
Definition: datatable.hpp:74
std::pair< std::vector< std::string >, std::vector< std::vector< std::string > > > DataTableStr
Definition: datatable.hpp:65
std::pair< std::vector< std::string >, std::vector< std::vector< std::variant< int64_t, long double, std::string > > > > TableType
DataType
enum for representing different data types
Definition: datatable.hpp:59
std::pair< std::vector< int64_t >, std::vector< std::vector< int64_t > > > DataTableInt
Definition: datatable.hpp:69
std::vector< std::vector< std::variant< int64_t, long double, std::string > > > MixedType
The source C++ openGPMP namespace.