40 #if defined(__x86_64__) || defined(__amd64__) || defined(__amd64) 
   47 #elif defined(__SSE2__) 
   49 #include <emmintrin.h> 
   50 #include <smmintrin.h> 
   58 void gpmp::linalg::Mtx::mtx_add(
const double *
A,
 
   64         for (
int i = 0; i < 
rows; ++i) {
 
   67             for (; j < 
cols - 1; j += 2) {
 
   69                 __m128d a = _mm_loadu_pd(&
A[i * 
cols + j]);
 
   70                 __m128d b = _mm_loadu_pd(&
B[i * 
cols + j]);
 
   71                 __m128d c = _mm_loadu_pd(&
C[i * 
cols + j]);
 
   76                 _mm_storeu_pd(&
C[i * 
cols + j], c);
 
   80             for (; j < 
cols; ++j) {
 
void std_mtx_add(const T *A, const T *B, T *C, int rows, int cols)
Perform matrix addition on two matrices as flat arrays.