40 #if defined(__x86_64__) || defined(__amd64__) || defined(__amd64)
50 #include <immintrin.h>
58 void gpmp::linalg::Mtx::mtx_add(
const std::vector<std::vector<float>> &
A,
59 const std::vector<std::vector<float>> &
B,
60 std::vector<std::vector<float>> &
C) {
61 const int rows =
A.size();
62 const int cols =
A[0].size();
65 for (
int i = 0; i <
rows; ++i) {
68 for (; j <
cols - 7; j += 8) {
70 __m256 a = _mm256_loadu_ps(&
A[i][j]);
71 __m256 b = _mm256_loadu_ps(&
B[i][j]);
72 __m256 c = _mm256_loadu_ps(&
C[i][j]);
75 c = _mm256_add_ps(a, b);
78 _mm256_storeu_ps(&
C[i][j], c);
82 for (; j <
cols; ++j) {
83 C[i][j] =
A[i][j] +
B[i][j];
void std_mtx_add(const T *A, const T *B, T *C, int rows, int cols)
Perform matrix addition on two matrices as flat arrays.