40 #if defined(__x86_64__) || defined(__amd64__) || defined(__amd64)
47 #elif defined(__SSE2__)
49 #include <emmintrin.h>
50 #include <smmintrin.h>
57 void gpmp::linalg::Mtx::mtx_add(
const float *
A,
63 for (
int i = 0; i <
rows; ++i) {
66 for (; j <
cols - 3; j += 4) {
68 __m128 a = _mm_loadu_ps(&
A[i *
cols + j]);
69 __m128 b = _mm_loadu_ps(&
B[i *
cols + j]);
70 __m128 c = _mm_loadu_ps(&
C[i *
cols + j]);
75 _mm_storeu_ps(&
C[i *
cols + j], c);
79 for (; j <
cols; ++j) {
void std_mtx_add(const T *A, const T *B, T *C, int rows, int cols)
Perform matrix addition on two matrices as flat arrays.