Line data Source code
1 : /*************************************************************************
2 : * Testing Mtx class operations
3 : ************************************************************************/
4 : #include "t_matrix.hpp"
5 : #include <chrono>
6 : #include <cmath>
7 : #include <cstdint>
8 : #include <gtest/gtest.h>
9 : #include <iostream>
10 : #include <limits.h>
11 : #include <openGPMP/linalg/mtx.hpp>
12 : #include <openGPMP/linalg/mtx_tmpl.hpp>
13 : #include <string>
14 : #include <vector>
15 :
16 : using namespace gpmp;
17 : #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m"
18 : #define INFO_COUT \
19 : std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m"
20 : namespace {
21 :
22 4 : TEST(MatrixArrayTestI32, AdditionComparisonSmall) {
23 1 : INFO_COUT << "MATRIX (as Arrays) INT32" << std::endl;
24 :
25 1 : int mtx_size = 184;
26 : // define input matrices A and B
27 1 : int *A = new int[mtx_size * mtx_size];
28 1 : int *B = new int[mtx_size * mtx_size];
29 1 : int *expected = new int[mtx_size * mtx_size];
30 1 : int *result = new int[mtx_size * mtx_size];
31 :
32 : // initialize random number generator
33 1 : std::random_device rd;
34 1 : std::mt19937 gen(rd());
35 1 : std::uniform_int_distribution<int> distribution(1, 100);
36 :
37 : // populate matrices A and B with random values
38 185 : for (int i = 0; i < mtx_size; ++i) {
39 34040 : for (int j = 0; j < mtx_size; ++j) {
40 33856 : A[i * mtx_size + j] = distribution(gen);
41 33856 : B[i * mtx_size + j] = distribution(gen);
42 : }
43 : }
44 :
45 : gpmp::linalg::Mtx mtx;
46 : // expected result using the naive implementation
47 1 : mtx.std_mtx_add(A, B, expected, mtx_size, mtx_size);
48 :
49 : // result using the intrinsics implementation
50 1 : mtx.mtx_add(A, B, result, mtx_size, mtx_size);
51 :
52 : /*
53 : std::cout << "Matrix A:" << std::endl;
54 : print_matrix(A, mtx_size, mtx_size);
55 :
56 : std::cout << "Matrix B:" << std::endl;
57 : print_matrix(B, mtx_size, mtx_size);
58 :
59 : std::cout << "Matrix EXPECTED after addition:" << std::endl;
60 : print_matrix(expected, mtx_size, mtx_size);
61 :
62 : std::cout << "Matrix RESULT after addition:" << std::endl;
63 : print_matrix(result, mtx_size, mtx_size);
64 : */
65 : // compare the results
66 1 : ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
67 1 : delete[] A;
68 1 : delete[] B;
69 1 : delete[] expected;
70 1 : delete[] result;
71 1 : }
72 :
73 4 : TEST(MatrixArrayTestI32, AdditionComparisonLarge) {
74 1 : int mtx_size = 1024;
75 : // define input matrices A and B
76 1 : int *A = new int[mtx_size * mtx_size];
77 1 : int *B = new int[mtx_size * mtx_size];
78 1 : int *expected = new int[mtx_size * mtx_size];
79 1 : int *result = new int[mtx_size * mtx_size];
80 :
81 : // initialize random number generator
82 1 : std::random_device rd;
83 1 : std::mt19937 gen(rd());
84 1 : std::uniform_int_distribution<int> distribution(1, 100);
85 :
86 : // populate matrices A and B with random values
87 1025 : for (int i = 0; i < mtx_size; ++i) {
88 1049600 : for (int j = 0; j < mtx_size; ++j) {
89 1048576 : A[i * mtx_size + j] = distribution(gen);
90 1048576 : B[i * mtx_size + j] = distribution(gen);
91 : }
92 : }
93 :
94 : gpmp::linalg::Mtx mtx;
95 : // expected result using the naive implementation
96 1 : mtx.std_mtx_add(A, B, expected, mtx_size, mtx_size);
97 :
98 : // result using the intrinsics implementation
99 1 : mtx.mtx_add(A, B, result, mtx_size, mtx_size);
100 :
101 : /*
102 : std::cout << "Matrix EXPECTED after addition:" << std::endl;
103 : for (int i = 0; i < mtx_size; ++i) {
104 : for (int j = 0; j < mtx_size; ++j) {
105 : std::cout << expected[i][j] << " ";
106 : }
107 : std::cout << std::endl;
108 : }
109 :
110 : std::cout << "Matrix RESULT after addition:" << std::endl;
111 : for (int i = 0; i < mtx_size; ++i) {
112 : for (int j = 0; j < mtx_size; ++j) {
113 : std::cout << result[i][j] << " ";
114 : }
115 : std::cout << std::endl;
116 : }
117 : */
118 :
119 : // compare the results
120 1 : ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
121 1 : delete[] A;
122 1 : delete[] B;
123 1 : delete[] expected;
124 1 : delete[] result;
125 1 : }
126 :
127 4 : TEST(MatrixArrayTestI32, AdditionPerformanceComparison) {
128 1 : int mtx_size = 1024;
129 1 : TEST_COUT << "Matrix size : " << mtx_size << std::endl;
130 : // define input matrices A and B
131 1 : int *A = new int[mtx_size * mtx_size];
132 1 : int *B = new int[mtx_size * mtx_size];
133 1 : int *expected = new int[mtx_size * mtx_size];
134 1 : int *result = new int[mtx_size * mtx_size];
135 :
136 : // initialize random number generator
137 1 : std::random_device rd;
138 1 : std::mt19937 gen(rd());
139 1 : std::uniform_int_distribution<int> distribution(1, 100);
140 :
141 : // populate matrices A and B with random values
142 1025 : for (int i = 0; i < mtx_size; ++i) {
143 1049600 : for (int j = 0; j < mtx_size; ++j) {
144 1048576 : A[i * mtx_size + j] = distribution(gen);
145 1048576 : B[i * mtx_size + j] = distribution(gen);
146 : }
147 : }
148 :
149 : gpmp::linalg::Mtx mtx;
150 1 : auto start_std = std::chrono::high_resolution_clock::now();
151 :
152 : // expected result using the naive implementation
153 1 : mtx.std_mtx_add(A, B, expected, mtx_size, mtx_size);
154 :
155 1 : auto end_std = std::chrono::high_resolution_clock::now();
156 1 : std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;
157 :
158 1 : auto start_intrin = std::chrono::high_resolution_clock::now();
159 :
160 : // result using the intrinsics implementation
161 1 : mtx.mtx_add(A, B, result, mtx_size, mtx_size);
162 1 : auto end_intrin = std::chrono::high_resolution_clock::now();
163 : std::chrono::duration<double> elapsed_seconds_intrin =
164 1 : end_intrin - start_intrin;
165 :
166 1 : TEST_COUT << "INTRINSIC Matrix Addition Time : "
167 1 : << elapsed_seconds_intrin.count() << " seconds" << std::endl;
168 1 : TEST_COUT << "STANDARD Matrix Addition Time : "
169 1 : << elapsed_seconds_std.count() << " seconds" << std::endl;
170 :
171 : // compare the results
172 1 : ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
173 1 : delete[] A;
174 1 : delete[] B;
175 1 : delete[] expected;
176 1 : delete[] result;
177 1 : }
178 :
179 4 : TEST(MatrixArrayTestI32, SubtractionPerformanceComparison) {
180 1 : int mtx_size = 1024;
181 1 : TEST_COUT << "Matrix size : " << mtx_size << std::endl;
182 : // define input matrices A and B
183 1 : int *A = new int[mtx_size * mtx_size];
184 1 : int *B = new int[mtx_size * mtx_size];
185 1 : int *expected = new int[mtx_size * mtx_size];
186 1 : int *result = new int[mtx_size * mtx_size];
187 :
188 : // initialize random number generator
189 1 : std::random_device rd;
190 1 : std::mt19937 gen(rd());
191 1 : std::uniform_int_distribution<int> distribution(1, 100);
192 :
193 : // populate matrices A and B with random values
194 1025 : for (int i = 0; i < mtx_size; ++i) {
195 1049600 : for (int j = 0; j < mtx_size; ++j) {
196 1048576 : A[i * mtx_size + j] = distribution(gen);
197 1048576 : B[i * mtx_size + j] = distribution(gen);
198 : }
199 : }
200 :
201 : gpmp::linalg::Mtx mtx;
202 1 : auto start_std = std::chrono::high_resolution_clock::now();
203 :
204 : // expected result using the naive implementation
205 1 : mtx.std_mtx_sub(A, B, expected, mtx_size, mtx_size);
206 :
207 1 : auto end_std = std::chrono::high_resolution_clock::now();
208 1 : std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;
209 :
210 1 : auto start_intrin = std::chrono::high_resolution_clock::now();
211 :
212 : // result using the intrinsics implementation
213 1 : mtx.mtx_sub(A, B, result, mtx_size, mtx_size);
214 1 : auto end_intrin = std::chrono::high_resolution_clock::now();
215 : std::chrono::duration<double> elapsed_seconds_intrin =
216 1 : end_intrin - start_intrin;
217 :
218 1 : TEST_COUT << "INTRINSIC Matrix Subtraction Time : "
219 1 : << elapsed_seconds_intrin.count() << " seconds" << std::endl;
220 1 : TEST_COUT << "STANDARD Matrix Subtraction Time : "
221 1 : << elapsed_seconds_std.count() << " seconds" << std::endl;
222 :
223 : // compare the results
224 1 : ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
225 1 : delete[] A;
226 1 : delete[] B;
227 1 : delete[] expected;
228 1 : delete[] result;
229 1 : }
230 :
231 4 : TEST(MatrixArrayTestI32, MultiplicationPerformanceComparison) {
232 1 : int mtx_size = 1024;
233 1 : TEST_COUT << "Matrix size : " << mtx_size << std::endl;
234 : // define input matrices A and B
235 1 : int *A = new int[mtx_size * mtx_size];
236 1 : int *B = new int[mtx_size * mtx_size];
237 1 : int *expected = new int[mtx_size * mtx_size];
238 1 : int *result = new int[mtx_size * mtx_size];
239 :
240 : // initialize random number generator
241 1 : std::random_device rd;
242 1 : std::mt19937 gen(rd());
243 1 : std::uniform_int_distribution<int> distribution(1, 100);
244 :
245 : // populate matrices A and B with random values
246 1025 : for (int i = 0; i < mtx_size; ++i) {
247 1049600 : for (int j = 0; j < mtx_size; ++j) {
248 1048576 : A[i * mtx_size + j] = distribution(gen);
249 1048576 : B[i * mtx_size + j] = distribution(gen);
250 : }
251 : }
252 :
253 : gpmp::linalg::Mtx mtx;
254 1 : auto start_std = std::chrono::high_resolution_clock::now();
255 :
256 : // expected result using the naive implementation
257 1 : mtx.std_mtx_mult(A, B, expected, mtx_size, mtx_size, mtx_size);
258 :
259 1 : auto end_std = std::chrono::high_resolution_clock::now();
260 1 : std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;
261 :
262 1 : auto start_intrin = std::chrono::high_resolution_clock::now();
263 :
264 : // result using the intrinsics implementation
265 1 : mtx.mtx_mult(A, B, result, mtx_size, mtx_size, mtx_size);
266 1 : auto end_intrin = std::chrono::high_resolution_clock::now();
267 : std::chrono::duration<double> elapsed_seconds_intrin =
268 1 : end_intrin - start_intrin;
269 :
270 1 : TEST_COUT << "INTRINSIC Matrix Multiplication Time : "
271 1 : << elapsed_seconds_intrin.count() << " seconds" << std::endl;
272 1 : TEST_COUT << "STANDARD Matrix Multiplication Time : "
273 1 : << elapsed_seconds_std.count() << " seconds" << std::endl;
274 :
275 : // compare the results
276 1 : ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
277 1 : delete[] A;
278 1 : delete[] B;
279 1 : delete[] expected;
280 1 : delete[] result;
281 1 : }
282 :
283 4 : TEST(MatrixArrayTestI32_I64, MultiplicationPerformanceComparison) {
284 1 : int mtx_size = 1024;
285 1 : TEST_COUT << "Matrix size : " << mtx_size << std::endl;
286 : // define input matrices A and B
287 1 : int *A = new int[mtx_size * mtx_size];
288 1 : int *B = new int[mtx_size * mtx_size];
289 1 : int64_t *expected = new int64_t[mtx_size * mtx_size];
290 1 : int64_t *result = new int64_t[mtx_size * mtx_size];
291 :
292 : // initialize random number generator
293 1 : std::random_device rd;
294 1 : std::mt19937 gen(rd());
295 1 : std::uniform_int_distribution<int> distribution(1, 100);
296 :
297 : // populate matrices A and B with random values
298 1025 : for (int i = 0; i < mtx_size; ++i) {
299 1049600 : for (int j = 0; j < mtx_size; ++j) {
300 1048576 : A[i * mtx_size + j] = distribution(gen);
301 1048576 : B[i * mtx_size + j] = distribution(gen);
302 : }
303 : }
304 :
305 : gpmp::linalg::Mtx mtx;
306 1 : auto start_std = std::chrono::high_resolution_clock::now();
307 :
308 : // expected result using the naive implementation
309 1 : mtx.std_mtx_mult(A, B, expected, mtx_size, mtx_size, mtx_size);
310 :
311 1 : auto end_std = std::chrono::high_resolution_clock::now();
312 1 : std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;
313 :
314 1 : auto start_intrin = std::chrono::high_resolution_clock::now();
315 :
316 : // result using the intrinsics implementation
317 1 : mtx.mtx_mult(A, B, result, mtx_size, mtx_size, mtx_size);
318 1 : auto end_intrin = std::chrono::high_resolution_clock::now();
319 : std::chrono::duration<double> elapsed_seconds_intrin =
320 1 : end_intrin - start_intrin;
321 :
322 1 : TEST_COUT << "INTRINSIC Matrix Multiplication Time : "
323 1 : << elapsed_seconds_intrin.count() << " seconds" << std::endl;
324 1 : TEST_COUT << "STANDARD Matrix Multiplication Time : "
325 1 : << elapsed_seconds_std.count() << " seconds" << std::endl;
326 :
327 : // compare the results
328 1 : ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
329 1 : delete[] A;
330 1 : delete[] B;
331 1 : delete[] expected;
332 1 : delete[] result;
333 1 : }
334 :
335 4 : TEST(MatrixArrayTestI32, TransposePerformanceComparison) {
336 1 : int mtx_size = 1024;
337 1 : TEST_COUT << "Matrix size : " << mtx_size << std::endl;
338 : // define input matrices A and B
339 1 : int *A = new int[mtx_size * mtx_size];
340 1 : int *At_naive = new int[mtx_size * mtx_size];
341 1 : int *At_intrin = new int[mtx_size * mtx_size];
342 :
343 : // initialize random number generator
344 1 : std::random_device rd;
345 1 : std::mt19937 gen(rd());
346 1 : std::uniform_int_distribution<int> distribution(1, 100);
347 :
348 : // populate matrix A with random values
349 1025 : for (int i = 0; i < mtx_size; ++i) {
350 1049600 : for (int j = 0; j < mtx_size; ++j) {
351 1048576 : A[i * mtx_size + j] = distribution(gen);
352 : }
353 : }
354 :
355 : gpmp::linalg::Mtx mtx;
356 :
357 1 : auto start_std = std::chrono::high_resolution_clock::now();
358 :
359 : // Calculate transpose using the naive implementation
360 1 : mtx.std_mtx_tpose(A, At_naive, mtx_size, mtx_size);
361 :
362 1 : auto end_std = std::chrono::high_resolution_clock::now();
363 1 : std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;
364 :
365 1 : auto start_intrin = std::chrono::high_resolution_clock::now();
366 :
367 : // Calculate transpose using the AVX2-accelerated implementation
368 1 : mtx.mtx_tpose(A, At_intrin, mtx_size, mtx_size);
369 :
370 1 : auto end_intrin = std::chrono::high_resolution_clock::now();
371 : std::chrono::duration<double> elapsed_seconds_intrin =
372 1 : end_intrin - start_intrin;
373 :
374 1 : TEST_COUT << "INTRINSIC Matrix Transpose Time : "
375 1 : << elapsed_seconds_intrin.count() << " seconds" << std::endl;
376 :
377 1 : TEST_COUT << "STANDARD Matrix Transpose Time : "
378 1 : << elapsed_seconds_std.count() << " seconds" << std::endl;
379 :
380 : // Compare the results
381 1 : ASSERT_TRUE(mtx_verif(At_naive, At_intrin, mtx_size, mtx_size));
382 :
383 1 : delete[] A;
384 1 : delete[] At_naive;
385 1 : delete[] At_intrin;
386 1 : }
387 :
388 : } // namespace
|