40 #if defined(__x86_64__) || defined(__amd64__) || defined(__amd64)
50 #include <immintrin.h>
59 const std::vector<double> &vec2,
60 std::vector<double> &result) {
61 const size_t size = vec1.size();
62 const double *data1 = vec1.data();
63 const double *data2 = vec2.data();
64 double *result_data = result.data();
70 for (; i < size - 3; i += 4) {
72 __m256d a = _mm256_loadu_pd(data1 + i);
73 __m256d b = _mm256_loadu_pd(data2 + i);
75 __m256d c = _mm256_add_pd(a, b);
78 _mm256_storeu_pd(result_data + i, c);
82 for (; i < size; ++i) {
83 result_data[i] = data1[i] + data2[i];
86 for (
size_t i = 0; i < size; ++i) {
87 result_data[i] = data1[i] + data2[i];
94 const std::vector<double> &vec2,
95 std::vector<double> &result) {
96 const int vecSize = vec1.size();
97 const int remainder = vecSize % 4;
98 const int vecSizeAligned = vecSize - remainder;
101 for (
int i = 0; i < vecSizeAligned; i += 4) {
102 __m256d vec1Data = _mm256_loadu_pd(&vec1[i]);
103 __m256d vec2Data = _mm256_loadu_pd(&vec2[i]);
104 __m256d sub = _mm256_sub_pd(vec1Data, vec2Data);
105 _mm256_storeu_pd(&result[i], sub);
109 for (
int i = vecSizeAligned; i < vecSize; ++i) {
110 result[i] = vec1[i] - vec2[i];
113 for (
int i = 0; i < vecSize; ++i) {
114 result[i] = vec1[i] - vec2[i];
121 std::vector<double> &result) {
122 const int vecSize = vec.size();
123 const int remainder = vecSize % 4;
124 const int vecSizeAligned = vecSize - remainder;
127 __m256d scalarVector = _mm256_set1_pd(scalar);
130 for (
int i = 0; i < vecSizeAligned; i += 4) {
132 __m256d vecData = _mm256_loadu_pd(&vec[i]);
135 __m256d resultData = _mm256_mul_pd(vecData, scalarVector);
138 _mm256_storeu_pd(&result[i], resultData);
142 for (
int i = vecSizeAligned; i < vecSize; ++i) {
143 result[i] = vec[i] * scalar;
148 const std::vector<double> &vec2) {
149 const int vecSize = vec1.size();
150 const int remainder = vecSize % 4;
151 const int vecSizeAligned = vecSize - remainder;
154 __m256d dotProduct = _mm256_setzero_pd();
157 for (
int i = 0; i < vecSizeAligned; i += 4) {
159 __m256d vec1Data = _mm256_loadu_pd(&vec1[i]);
160 __m256d vec2Data = _mm256_loadu_pd(&vec2[i]);
164 _mm256_add_pd(dotProduct, _mm256_mul_pd(vec1Data, vec2Data));
169 alignas(32)
double temp[4];
170 _mm256_store_pd(temp, dotProduct);
171 for (
int i = 0; i < 4; ++i) {
176 for (
int i = vecSizeAligned; i < vecSize; ++i) {
177 result += vec1[i] * vec2[i];
int dot_product(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2)
Computes the dot product for vectors of signed 8-bit integers.
void vector_sub(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2, std::vector< int8_t > &result)
Performs vector subtraction for vectors of signed 8-bit integers.
void scalar_mult(const std::vector< int8_t > &vec1, int scalar, std::vector< int8_t > &result)
Performs scalar multiplication for vectors of signed 8-bit integers.
void vector_add(const std::vector< int8_t > &vec1, const std::vector< int8_t > &vec2, std::vector< int8_t > &result)
Performs vector addition for vectors of signed 8-bit integers.