LCOV - code coverage report
Current view: top level - tests/linalg - t_dgemm_arr.cpp (source / functions) Hit Total Coverage
Test: lcov.info Lines: 38 39 97.4 %
Date: 2024-05-13 05:06:06 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #include "t_matrix.hpp"
       2             : #include <chrono>
       3             : #include <cmath>
       4             : #include <cstdint>
       5             : #include <gtest/gtest.h>
       6             : #include <iostream>
       7             : #include <limits.h>
       8             : #include <openGPMP/linalg/_dgemm.hpp>
       9             : #include <openGPMP/linalg/mtx.hpp>
      10             : #include <random>
      11             : #include <string>
      12             : #include <vector>
      13             : 
      14             : const double TOLERANCE = 1e-3;
      15             : 
      16             : using namespace gpmp;
      17             : #define TEST_COUT std::cerr << "\033[32m[          ] [ INFO ] \033[0m"
      18             : 
      19             : namespace {
      20           4 : TEST(GEMMArrayTest, DGEMMPerformanceComparison) {
      21           1 :     int mtx_size = 1024;
      22             : 
      23           1 :     TEST_COUT << "Matrix size      : " << mtx_size << std::endl;
      24             :     // define input matrices A and B
      25           1 :     double *A = new double[mtx_size * mtx_size];
      26           1 :     double *B = new double[mtx_size * mtx_size];
      27           1 :     double *expected = new double[mtx_size * mtx_size];
      28           1 :     double *result = new double[mtx_size * mtx_size];
      29             : 
      30             :     // initialize random number generator
      31           1 :     std::random_device rd;
      32           1 :     std::mt19937 gen(rd());
      33           1 :     std::uniform_real_distribution<double> distribution(1.0, 100.0);
      34             : 
      35             :     // populate matrices A and B with random values
      36        1025 :     for (int i = 0; i < mtx_size; ++i) {
      37     1049600 :         for (int j = 0; j < mtx_size; ++j) {
      38     1048576 :             A[i * mtx_size + j] = distribution(gen);
      39     1048576 :             B[i * mtx_size + j] = distribution(gen);
      40             :         }
      41             :     }
      42             : 
      43             :     gpmp::linalg::Mtx mtx;
      44             :     gpmp::linalg::DGEMM dgemm;
      45           1 :     auto start_std = std::chrono::high_resolution_clock::now();
      46             : 
      47             :     // expected result using the naive implementation
      48           1 :     mtx.std_mtx_mult(A, B, expected, mtx_size, mtx_size, mtx_size);
      49             : 
      50           1 :     auto end_std = std::chrono::high_resolution_clock::now();
      51           1 :     std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;
      52             : 
      53           1 :     auto start_intrin = std::chrono::high_resolution_clock::now();
      54             : 
      55             :     // result using the intrinsics implementation
      56           1 :     dgemm.dgemm_nn(mtx_size,
      57             :                    mtx_size,
      58             :                    mtx_size,
      59             :                    1.0,
      60             :                    A,
      61             :                    mtx_size,
      62             :                    1,
      63             :                    B,
      64             :                    mtx_size,
      65             :                    1,
      66             :                    0.0,
      67             :                    result,
      68             :                    mtx_size,
      69             :                    1);
      70             : 
      71           1 :     auto end_intrin = std::chrono::high_resolution_clock::now();
      72             :     std::chrono::duration<double> elapsed_seconds_intrin =
      73           1 :         end_intrin - start_intrin;
      74             : 
      75           1 :     TEST_COUT << "ROUTINE   Matrix Multiplication Time      : "
      76           1 :               << elapsed_seconds_intrin.count() << " seconds" << std::endl;
      77           1 :     TEST_COUT << "STANDARD  Matrix Multiplication Time      : "
      78           1 :               << elapsed_seconds_std.count() << " seconds" << std::endl;
      79             : 
      80             :     // compare the results
      81             :     /*for (int i = 0; i < mtx_size; i++) {
      82             :         for (int j = 0; j < mtx_size; j++) {
      83             :             EXPECT_NEAR(expected[i * mtx_size + j],
      84             :                         result[i * mtx_size + j],
      85             :                         TOLERANCE);
      86             :         }
      87             :     }*/
      88             : 
      89           1 :     int count = 0;
      90           1 :     int matches = 0;
      91             :     int i;
      92             : 
      93     1048577 :     for (i = 0; i < mtx_size * mtx_size; ++i) {
      94             :         // printf("Comparing element at index %d: %.2f vs %.2f\n", i,
      95             :         // expected[i], result[i]);
      96     1048576 :         if (expected[i] != result[i]) {
      97           0 :             count++;
      98             :         } else {
      99     1048576 :             matches++;
     100             :         }
     101             :     }
     102           1 :     printf("MISMATCHES  / TOTAL : %d/%d\n", count, i);
     103           1 :     printf("MATCHES     / TOTAL : %d/%d\n", matches, i);
     104             : 
     105             :     // ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
     106             : 
     107           1 :     delete[] A;
     108           1 :     delete[] B;
     109           1 :     delete[] expected;
     110           1 :     delete[] result;
     111           1 : }
     112             : 
     113             : } // namespace

Generated by: LCOV version 1.14