Line data Source code
1 : #include "t_matrix.hpp" 2 : #include <chrono> 3 : #include <cmath> 4 : #include <cstdint> 5 : #include <gtest/gtest.h> 6 : #include <iostream> 7 : #include <limits.h> 8 : #include <openGPMP/linalg/_sgemm.hpp> 9 : #include <openGPMP/linalg/mtx.hpp> 10 : #include <random> 11 : #include <string> 12 : #include <vector> 13 : 14 : const double TOLERANCE = 1e-2; 15 : 16 : using namespace gpmp; 17 : #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" 18 : 19 : namespace { 20 4 : TEST(GEMMArrayTest, SGEMMPerformanceComparison) { 21 1 : int mtx_size = 1024; 22 1 : TEST_COUT << "Matrix size : " << mtx_size << std::endl; 23 : // define input matrices A and B 24 1 : float *A = new float[mtx_size * mtx_size]; 25 1 : float *B = new float[mtx_size * mtx_size]; 26 1 : float *expected = new float[mtx_size * mtx_size]; 27 1 : float *result = new float[mtx_size * mtx_size]; 28 : 29 : // initialize random number generator 30 1 : std::random_device rd; 31 1 : std::mt19937 gen(rd()); 32 1 : std::uniform_real_distribution<float> distribution(1.0, 100.0); 33 : 34 : // populate matrices A and B with random values 35 1025 : for (int i = 0; i < mtx_size; ++i) { 36 1049600 : for (int j = 0; j < mtx_size; ++j) { 37 1048576 : A[i * mtx_size + j] = distribution(gen); 38 1048576 : B[i * mtx_size + j] = distribution(gen); 39 : } 40 : } 41 : 42 : gpmp::linalg::Mtx mtx; 43 : gpmp::linalg::SGEMM sgemm; 44 1 : auto start_std = std::chrono::high_resolution_clock::now(); 45 : 46 : // expected result using the naive implementation 47 1 : mtx.std_mtx_mult(A, B, expected, mtx_size, mtx_size, mtx_size); 48 : 49 1 : auto end_std = std::chrono::high_resolution_clock::now(); 50 1 : std::chrono::duration<double> elapsed_seconds_std = end_std - start_std; 51 : 52 1 : auto start_intrin = std::chrono::high_resolution_clock::now(); 53 : 54 : // result using the intrinsics implementation 55 1 : sgemm.sgemm_nn(mtx_size, 56 : mtx_size, 57 : mtx_size, 58 : 1.0, 59 : A, 60 : mtx_size, 61 : 1, 62 : B, 63 : mtx_size, 64 : 1, 65 : 0.0, 66 : result, 67 : mtx_size, 68 : 1); 69 : 70 1 : auto end_intrin = std::chrono::high_resolution_clock::now(); 71 : std::chrono::duration<double> elapsed_seconds_intrin = 72 1 : end_intrin - start_intrin; 73 : 74 1 : TEST_COUT << "ROUTINE Matrix Multiplication Time : " 75 1 : << elapsed_seconds_intrin.count() << " seconds" << std::endl; 76 1 : TEST_COUT << "STANDARD Matrix Multiplication Time : " 77 1 : << elapsed_seconds_std.count() << " seconds" << std::endl; 78 : 79 : // compare the results 80 1025 : for (int i = 0; i < mtx_size; i++) { 81 1049600 : for (int j = 0; j < mtx_size; j++) { 82 : // EXPECT_NEAR(expected[i * mtx_size + j], 83 : // result[i * mtx_size + j], 84 : // TOLERANCE); 85 : } 86 : } 87 : // ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size)); 88 : 89 1 : delete[] A; 90 1 : delete[] B; 91 1 : delete[] expected; 92 1 : delete[] result; 93 1 : } 94 : 95 : } // namespace