openGPMP
Open Source Mathematics Package
vector_avx2_f64.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 #include <cmath>
34 #include <iostream>
35 #include <numeric>
37 #include <stdexcept>
38 #include <vector>
39 
40 #if defined(__x86_64__) || defined(__amd64__) || defined(__amd64)
41 
42 /************************************************************************
43  *
44  * Vector Operations for AVX ISA
45  *
46  ************************************************************************/
47 #if defined(__AVX2__)
48 
49 // AVX family intrinsics
50 #include <immintrin.h>
51 
52 /************************************************************************
53  *
54  * Vector Operations on Vectors
55  *
56  ************************************************************************/
57 
58 void gpmp::linalg::vector_add(const std::vector<double> &vec1,
59  const std::vector<double> &vec2,
60  std::vector<double> &result) {
61  const size_t size = vec1.size();
62  const double *data1 = vec1.data();
63  const double *data2 = vec2.data();
64  double *result_data = result.data();
65 
66  if (size > 4) {
67  size_t i = 0;
68  // perform vectorized addition as long as there are at least 4 elements
69  // remaining
70  for (; i < size - 3; i += 4) {
71  // Load 4 elements from vec1 and vec2
72  __m256d a = _mm256_loadu_pd(data1 + i);
73  __m256d b = _mm256_loadu_pd(data2 + i);
74 
75  __m256d c = _mm256_add_pd(a, b);
76 
77  // Store the result back to result vector
78  _mm256_storeu_pd(result_data + i, c);
79  }
80 
81  // Perform standard addition on the remaining elements
82  for (; i < size; ++i) {
83  result_data[i] = data1[i] + data2[i];
84  }
85  } else { // If size is not greater than 4, perform standard addition
86  for (size_t i = 0; i < size; ++i) {
87  result_data[i] = data1[i] + data2[i];
88  }
89  }
90 }
91 
92 // Vector subtraction using AVX2 intrinsics, operates on double types
93 void gpmp::linalg::vector_sub(const std::vector<double> &vec1,
94  const std::vector<double> &vec2,
95  std::vector<double> &result) {
96  const int vecSize = vec1.size();
97  const int remainder = vecSize % 4;
98  const int vecSizeAligned = vecSize - remainder;
99 
100  if (vecSize > 4) {
101  for (int i = 0; i < vecSizeAligned; i += 4) {
102  __m256d vec1Data = _mm256_loadu_pd(&vec1[i]);
103  __m256d vec2Data = _mm256_loadu_pd(&vec2[i]);
104  __m256d sub = _mm256_sub_pd(vec1Data, vec2Data);
105  _mm256_storeu_pd(&result[i], sub);
106  }
107 
108  // Perform standard subtraction on the remaining elements
109  for (int i = vecSizeAligned; i < vecSize; ++i) {
110  result[i] = vec1[i] - vec2[i];
111  }
112  } else {
113  for (int i = 0; i < vecSize; ++i) {
114  result[i] = vec1[i] - vec2[i];
115  }
116  }
117 }
118 
119 void gpmp::linalg::scalar_mult(const std::vector<double> &vec,
120  double scalar,
121  std::vector<double> &result) {
122  const int vecSize = vec.size();
123  const int remainder = vecSize % 4;
124  const int vecSizeAligned = vecSize - remainder;
125 
126  // Set the scalar value into a vector register
127  __m256d scalarVector = _mm256_set1_pd(scalar);
128 
129  // Process aligned part of the vector
130  for (int i = 0; i < vecSizeAligned; i += 4) {
131  // Load 4 double elements from the vector into a AVX register
132  __m256d vecData = _mm256_loadu_pd(&vec[i]);
133 
134  // Multiply the elements with the scalar
135  __m256d resultData = _mm256_mul_pd(vecData, scalarVector);
136 
137  // Store the result back to the result vector
138  _mm256_storeu_pd(&result[i], resultData);
139  }
140 
141  // Process the remaining part of the vector
142  for (int i = vecSizeAligned; i < vecSize; ++i) {
143  result[i] = vec[i] * scalar;
144  }
145 }
146 
147 double gpmp::linalg::dot_product(const std::vector<double> &vec1,
148  const std::vector<double> &vec2) {
149  const int vecSize = vec1.size();
150  const int remainder = vecSize % 4;
151  const int vecSizeAligned = vecSize - remainder;
152 
153  // Initialize the result to zero
154  __m256d dotProduct = _mm256_setzero_pd();
155 
156  // Process aligned part of the vectors
157  for (int i = 0; i < vecSizeAligned; i += 4) {
158  // Load 4 double elements from each vector into AVX registers
159  __m256d vec1Data = _mm256_loadu_pd(&vec1[i]);
160  __m256d vec2Data = _mm256_loadu_pd(&vec2[i]);
161 
162  // Multiply the elements pairwise and accumulate the result
163  dotProduct =
164  _mm256_add_pd(dotProduct, _mm256_mul_pd(vec1Data, vec2Data));
165  }
166 
167  // Sum up the elements in the dotProduct register
168  double result = 0.0;
169  alignas(32) double temp[4];
170  _mm256_store_pd(temp, dotProduct);
171  for (int i = 0; i < 4; ++i) {
172  result += temp[i];
173  }
174 
175  // Process the remaining part of the vectors
176  for (int i = vecSizeAligned; i < vecSize; ++i) {
177  result += vec1[i] * vec2[i];
178  }
179 
180  return result;
181 }
182 
183 #endif
184 
185 #endif
int dot_product(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2)
Computes the dot product for vectors of signed 8-bit integers.
void vector_sub(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2, std::vector< int8_t > &result)
Performs vector subtraction for vectors of signed 8-bit integers.
void scalar_mult(const std::vector< int8_t > &vec1, int scalar, std::vector< int8_t > &result)
Performs scalar multiplication for vectors of signed 8-bit integers.
void vector_add(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2, std::vector< int8_t > &result)
Performs vector addition for vectors of signed 8-bit integers.