13 auto start = std::chrono::high_resolution_clock::now();
38 for (
int i = 0; i <
N; i++) {
39 for (
int j = 0; j <
N; j++) {
41 row = _mm_load_ps(&
A[i][0]);
44 col = _mm_set_ps(
B[3][j],
B[2][j],
B[1][j],
B[0][j]);
47 res = _mm_mul_ps(row, col);
50 _mm_store_ps(tmp,
res);
51 C[i][j] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
55 auto end = std::chrono::high_resolution_clock::now();
57 std::chrono::duration<double, std::nano> elapsed = end - start;
58 std::cout <<
"Elapsed time: " << elapsed.count() <<
" ns\n";
62 auto start = std::chrono::high_resolution_clock::now();
63 for (
size_t i = 0; i <
N; ++i) {
64 for (
size_t j = 0; j <
N; ++j) {
65 C[i][j] =
A[i][0] *
B[0][j];
66 for (
size_t k = 1; k <
N; ++k) {
67 C[i][j] +=
A[i][k] *
B[k][j];
72 auto end = std::chrono::high_resolution_clock::now();
74 std::chrono::duration<double, std::nano> elapsed = end - start;
75 std::cout <<
"Elapsed time: " << elapsed.count() <<
" ns\n";
92 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
93 {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
94 {25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36},
95 {37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
96 {49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60},
97 {61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72},
98 {73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84},
99 {85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96},
100 {97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108},
101 {109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120},
102 {121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132},
103 {133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144}};
106 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
107 {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
108 {25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36},
109 {37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
110 {49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60},
111 {61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72},
112 {73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84},
113 {85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96},
114 {97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108},
115 {109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120},
116 {121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132},
117 {133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144}};
140 for (
int i = 0; i <
N; i++) {
141 for (
int j = 0; j <
N; j++) {
142 std::cout <<
C[i][j] <<
" ";
144 std::cout << std::endl;
146 std::cout << std::endl;
153 for (
int i = 0; i <
N; i++) {
154 for (
int j = 0; j <
N; j++) {
155 std::cout << Z[i][j] <<
" ";
157 std::cout << std::endl;
void multiply_matrices(float A[N][N], float B[N][N], float C[N][N])
void multiply_matrices_std(float A[N][N], float B[N][N], float C[N][N])