LCOV - code coverage report
Current view: top level - tests/linalg - t_sgemm_arr.cpp (source / functions) Hit Total Coverage
Test: lcov.info Lines: 33 33 100.0 %
Date: 2024-05-13 05:06:06 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #include "t_matrix.hpp"
       2             : #include <chrono>
       3             : #include <cmath>
       4             : #include <cstdint>
       5             : #include <gtest/gtest.h>
       6             : #include <iostream>
       7             : #include <limits.h>
       8             : #include <openGPMP/linalg/_sgemm.hpp>
       9             : #include <openGPMP/linalg/mtx.hpp>
      10             : #include <random>
      11             : #include <string>
      12             : #include <vector>
      13             : 
      14             : const double TOLERANCE = 1e-2;
      15             : 
      16             : using namespace gpmp;
      17             : #define TEST_COUT std::cerr << "\033[32m[          ] [ INFO ] \033[0m"
      18             : 
      19             : namespace {
      20           4 : TEST(GEMMArrayTest, SGEMMPerformanceComparison) {
      21           1 :     int mtx_size = 1024;
      22           1 :     TEST_COUT << "Matrix size      : " << mtx_size << std::endl;
      23             :     // define input matrices A and B
      24           1 :     float *A = new float[mtx_size * mtx_size];
      25           1 :     float *B = new float[mtx_size * mtx_size];
      26           1 :     float *expected = new float[mtx_size * mtx_size];
      27           1 :     float *result = new float[mtx_size * mtx_size];
      28             : 
      29             :     // initialize random number generator
      30           1 :     std::random_device rd;
      31           1 :     std::mt19937 gen(rd());
      32           1 :     std::uniform_real_distribution<float> distribution(1.0, 100.0);
      33             : 
      34             :     // populate matrices A and B with random values
      35        1025 :     for (int i = 0; i < mtx_size; ++i) {
      36     1049600 :         for (int j = 0; j < mtx_size; ++j) {
      37     1048576 :             A[i * mtx_size + j] = distribution(gen);
      38     1048576 :             B[i * mtx_size + j] = distribution(gen);
      39             :         }
      40             :     }
      41             : 
      42             :     gpmp::linalg::Mtx mtx;
      43             :     gpmp::linalg::SGEMM sgemm;
      44           1 :     auto start_std = std::chrono::high_resolution_clock::now();
      45             : 
      46             :     // expected result using the naive implementation
      47           1 :     mtx.std_mtx_mult(A, B, expected, mtx_size, mtx_size, mtx_size);
      48             : 
      49           1 :     auto end_std = std::chrono::high_resolution_clock::now();
      50           1 :     std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;
      51             : 
      52           1 :     auto start_intrin = std::chrono::high_resolution_clock::now();
      53             : 
      54             :     // result using the intrinsics implementation
      55           1 :     sgemm.sgemm_nn(mtx_size,
      56             :                    mtx_size,
      57             :                    mtx_size,
      58             :                    1.0,
      59             :                    A,
      60             :                    mtx_size,
      61             :                    1,
      62             :                    B,
      63             :                    mtx_size,
      64             :                    1,
      65             :                    0.0,
      66             :                    result,
      67             :                    mtx_size,
      68             :                    1);
      69             : 
      70           1 :     auto end_intrin = std::chrono::high_resolution_clock::now();
      71             :     std::chrono::duration<double> elapsed_seconds_intrin =
      72           1 :         end_intrin - start_intrin;
      73             : 
      74           1 :     TEST_COUT << "ROUTINE   Matrix Multiplication Time      : "
      75           1 :               << elapsed_seconds_intrin.count() << " seconds" << std::endl;
      76           1 :     TEST_COUT << "STANDARD  Matrix Multiplication Time      : "
      77           1 :               << elapsed_seconds_std.count() << " seconds" << std::endl;
      78             : 
      79             :     // compare the results
      80        1025 :     for (int i = 0; i < mtx_size; i++) {
      81     1049600 :         for (int j = 0; j < mtx_size; j++) {
      82             :             // EXPECT_NEAR(expected[i * mtx_size + j],
      83             :             //             result[i * mtx_size + j],
      84             :             //             TOLERANCE);
      85             :         }
      86             :     }
      87             :     // ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
      88             : 
      89           1 :     delete[] A;
      90           1 :     delete[] B;
      91           1 :     delete[] expected;
      92           1 :     delete[] result;
      93           1 : }
      94             : 
      95             : } // namespace

Generated by: LCOV version 1.14