LCOV - code coverage report
Current view: top level - modules/linalg/avx - vector_avx2_f64.cpp (source / functions) Hit Total Coverage
Test: lcov.info Lines: 57 59 96.6 %
Date: 2024-05-13 05:06:06 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*************************************************************************
       2             :  *
       3             :  *  Project
       4             :  *                         _____ _____  __  __ _____
       5             :  *                        / ____|  __ \|  \/  |  __ \
       6             :  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |
       7             :  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/
       8             :  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |
       9             :  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|
      10             :  *      | |
      11             :  *      |_|
      12             :  *
      13             :  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
      14             :  *
      15             :  * This software is licensed as described in the file LICENSE, which
      16             :  * you should have received as part of this distribution. The terms
      17             :  * among other details are referenced in the official documentation
      18             :  * seen here : https://akielaries.github.io/openGPMP/ along with
      19             :  * important files seen in this project.
      20             :  *
      21             :  * You may opt to use, copy, modify, merge, publish, distribute
      22             :  * and/or sell copies of the Software, and permit persons to whom
      23             :  * the Software is furnished to do so, under the terms of the
      24             :  * LICENSE file. As this is an Open Source effort, all implementations
      25             :  * must be of the same methodology.
      26             :  *
      27             :  *
      28             :  *
      29             :  * This software is distributed on an AS IS basis, WITHOUT
      30             :  * WARRANTY OF ANY KIND, either express or implied.
      31             :  *
      32             :  ************************************************************************/
      33             : #include <cmath>
      34             : #include <iostream>
      35             : #include <numeric>
      36             : #include <openGPMP/linalg/vector.hpp>
      37             : #include <stdexcept>
      38             : #include <vector>
      39             : 
      40             : #if defined(__x86_64__) || defined(__amd64__) || defined(__amd64)
      41             : 
      42             : /************************************************************************
      43             :  *
      44             :  * Vector Operations for AVX ISA
      45             :  *
      46             :  ************************************************************************/
      47             : #if defined(__AVX2__)
      48             : 
      49             : // AVX family intrinsics
      50             : #include <immintrin.h>
      51             : 
      52             : /************************************************************************
      53             :  *
      54             :  * Vector Operations on Vectors
      55             :  *
      56             :  ************************************************************************/
      57             : 
      58           4 : void gpmp::linalg::vector_add(const std::vector<double> &vec1,
      59             :                               const std::vector<double> &vec2,
      60             :                               std::vector<double> &result) {
      61           4 :     const size_t size = vec1.size();
      62           4 :     const double *data1 = vec1.data();
      63           4 :     const double *data2 = vec2.data();
      64           4 :     double *result_data = result.data();
      65             : 
      66           4 :     if (size > 4) {
      67           2 :         size_t i = 0;
      68             :         // perform vectorized addition as long as there are at least 4 elements
      69             :         // remaining
      70     2778057 :         for (; i < size - 3; i += 4) {
      71             :             // Load 4 elements from vec1 and vec2
      72     2778055 :             __m256d a = _mm256_loadu_pd(data1 + i);
      73     5556110 :             __m256d b = _mm256_loadu_pd(data2 + i);
      74             : 
      75     2778055 :             __m256d c = _mm256_add_pd(a, b);
      76             : 
      77             :             // Store the result back to result vector
      78     2778055 :             _mm256_storeu_pd(result_data + i, c);
      79             :         }
      80             : 
      81             :         // Perform standard addition on the remaining elements
      82           4 :         for (; i < size; ++i) {
      83           2 :             result_data[i] = data1[i] + data2[i];
      84             :         }
      85             :     } else { // If size is not greater than 4, perform standard addition
      86          10 :         for (size_t i = 0; i < size; ++i) {
      87           8 :             result_data[i] = data1[i] + data2[i];
      88             :         }
      89             :     }
      90           4 : }
      91             : 
      92             : // Vector subtraction using AVX2 intrinsics, operates on double types
      93           4 : void gpmp::linalg::vector_sub(const std::vector<double> &vec1,
      94             :                               const std::vector<double> &vec2,
      95             :                               std::vector<double> &result) {
      96           4 :     const int vecSize = vec1.size();
      97           4 :     const int remainder = vecSize % 4;
      98           4 :     const int vecSizeAligned = vecSize - remainder;
      99             : 
     100           4 :     if (vecSize > 4) {
     101     2778063 :         for (int i = 0; i < vecSizeAligned; i += 4) {
     102     2778059 :             __m256d vec1Data = _mm256_loadu_pd(&vec1[i]);
     103     5556118 :             __m256d vec2Data = _mm256_loadu_pd(&vec2[i]);
     104     2778059 :             __m256d sub = _mm256_sub_pd(vec1Data, vec2Data);
     105     2778059 :             _mm256_storeu_pd(&result[i], sub);
     106             :         }
     107             : 
     108             :         // Perform standard subtraction on the remaining elements
     109           6 :         for (int i = vecSizeAligned; i < vecSize; ++i) {
     110           2 :             result[i] = vec1[i] - vec2[i];
     111             :         }
     112             :     } else {
     113           0 :         for (int i = 0; i < vecSize; ++i) {
     114           0 :             result[i] = vec1[i] - vec2[i];
     115             :         }
     116             :     }
     117           4 : }
     118             : 
     119           4 : void gpmp::linalg::scalar_mult(const std::vector<double> &vec,
     120             :                                double scalar,
     121             :                                std::vector<double> &result) {
     122           4 :     const int vecSize = vec.size();
     123           4 :     const int remainder = vecSize % 4;
     124           4 :     const int vecSizeAligned = vecSize - remainder;
     125             : 
     126             :     // Set the scalar value into a vector register
     127           4 :     __m256d scalarVector = _mm256_set1_pd(scalar);
     128             : 
     129             :     // Process aligned part of the vector
     130     2778061 :     for (int i = 0; i < vecSizeAligned; i += 4) {
     131             :         // Load 4 double elements from the vector into a AVX register
     132     5556114 :         __m256d vecData = _mm256_loadu_pd(&vec[i]);
     133             : 
     134             :         // Multiply the elements with the scalar
     135     2778057 :         __m256d resultData = _mm256_mul_pd(vecData, scalarVector);
     136             : 
     137             :         // Store the result back to the result vector
     138     2778057 :         _mm256_storeu_pd(&result[i], resultData);
     139             :     }
     140             : 
     141             :     // Process the remaining part of the vector
     142           6 :     for (int i = vecSizeAligned; i < vecSize; ++i) {
     143           2 :         result[i] = vec[i] * scalar;
     144             :     }
     145           4 : }
     146             : 
     147           3 : double gpmp::linalg::dot_product(const std::vector<double> &vec1,
     148             :                                  const std::vector<double> &vec2) {
     149           3 :     const int vecSize = vec1.size();
     150           3 :     const int remainder = vecSize % 4;
     151           3 :     const int vecSizeAligned = vecSize - remainder;
     152             : 
     153             :     // Initialize the result to zero
     154           3 :     __m256d dotProduct = _mm256_setzero_pd();
     155             : 
     156             :     // Process aligned part of the vectors
     157     2778059 :     for (int i = 0; i < vecSizeAligned; i += 4) {
     158             :         // Load 4 double elements from each vector into AVX registers
     159     2778056 :         __m256d vec1Data = _mm256_loadu_pd(&vec1[i]);
     160     5556112 :         __m256d vec2Data = _mm256_loadu_pd(&vec2[i]);
     161             : 
     162             :         // Multiply the elements pairwise and accumulate the result
     163             :         dotProduct =
     164     5556112 :             _mm256_add_pd(dotProduct, _mm256_mul_pd(vec1Data, vec2Data));
     165             :     }
     166             : 
     167             :     // Sum up the elements in the dotProduct register
     168           3 :     double result = 0.0;
     169             :     alignas(32) double temp[4];
     170             :     _mm256_store_pd(temp, dotProduct);
     171          15 :     for (int i = 0; i < 4; ++i) {
     172          12 :         result += temp[i];
     173             :     }
     174             : 
     175             :     // Process the remaining part of the vectors
     176           5 :     for (int i = vecSizeAligned; i < vecSize; ++i) {
     177           2 :         result += vec1[i] * vec2[i];
     178             :     }
     179             : 
     180           3 :     return result;
     181             : }
     182             : 
     183             : #endif
     184             : 
     185             : #endif

Generated by: LCOV version 1.14