Line data Source code
1 : #include "t_matrix.hpp" 2 : #include <chrono> 3 : #include <cmath> 4 : #include <cstdint> 5 : #include <gtest/gtest.h> 6 : #include <iostream> 7 : #include <limits.h> 8 : #include <openGPMP/linalg/_igemm.hpp> 9 : #include <openGPMP/linalg/mtx.hpp> 10 : #include <random> 11 : #include <string> 12 : #include <vector> 13 : 14 : const double TOLERANCE = 1e-3; 15 : 16 : using namespace gpmp; 17 : #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" 18 : #define INFO_COUT \ 19 : std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" 20 : 21 : namespace { 22 4 : TEST(GEMMArrayTest, IGEMMPerformanceComparison) { 23 1 : INFO_COUT << "GEMM ROUTINES" << std::endl; 24 1 : int mtx_size = 1024; 25 1 : TEST_COUT << "Matrix size : " << mtx_size << std::endl; 26 : // define input matrices A and B 27 1 : int *A = new int[mtx_size * mtx_size]; 28 1 : int *B = new int[mtx_size * mtx_size]; 29 1 : int *expected = new int[mtx_size * mtx_size]; 30 1 : int *result = new int[mtx_size * mtx_size]; 31 : 32 : // initialize random number generator 33 1 : std::random_device rd; 34 1 : std::mt19937 gen(rd()); 35 1 : std::uniform_int_distribution<int> distribution(1.0, 100.0); 36 : 37 : // populate matrices A and B with random values 38 1025 : for (int i = 0; i < mtx_size; ++i) { 39 1049600 : for (int j = 0; j < mtx_size; ++j) { 40 1048576 : A[i * mtx_size + j] = distribution(gen); 41 1048576 : B[i * mtx_size + j] = distribution(gen); 42 : } 43 : } 44 : 45 : gpmp::linalg::Mtx mtx; 46 : gpmp::linalg::IGEMM igemm; 47 1 : auto start_std = std::chrono::high_resolution_clock::now(); 48 : 49 : // expected result using the naive implementation 50 1 : mtx.std_mtx_mult(A, B, expected, mtx_size, mtx_size, mtx_size); 51 : 52 1 : auto end_std = std::chrono::high_resolution_clock::now(); 53 1 : std::chrono::duration<double> elapsed_seconds_std = end_std - start_std; 54 : 55 1 : auto start_intrin = std::chrono::high_resolution_clock::now(); 56 : 57 : // result using the intrinsics implementation 58 1 : igemm.igemm_nn(mtx_size, 59 : mtx_size, 60 : mtx_size, 61 : 1.0, 62 : A, 63 : mtx_size, 64 : 1, 65 : B, 66 : mtx_size, 67 : 1, 68 : 0.0, 69 : result, 70 : mtx_size, 71 : 1); 72 : 73 1 : auto end_intrin = std::chrono::high_resolution_clock::now(); 74 : std::chrono::duration<double> elapsed_seconds_intrin = 75 1 : end_intrin - start_intrin; 76 : 77 1 : TEST_COUT << "ROUTINE Matrix Multiplication Time : " 78 1 : << elapsed_seconds_intrin.count() << " seconds" << std::endl; 79 1 : TEST_COUT << "STANDARD Matrix Multiplication Time : " 80 1 : << elapsed_seconds_std.count() << " seconds" << std::endl; 81 : 82 : // compare the results 83 1025 : for (int i = 0; i < mtx_size; i++) { 84 1049600 : for (int j = 0; j < mtx_size; j++) { 85 1048576 : EXPECT_EQ(expected[i * mtx_size + j], result[i * mtx_size + j]); 86 : } 87 : } 88 : // ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size)); 89 : 90 1 : delete[] A; 91 1 : delete[] B; 92 1 : delete[] expected; 93 1 : delete[] result; 94 1 : } 95 : 96 : } // namespace