openGPMP
Open Source Mathematics Package
_simd_intel_mtx.cpp
Go to the documentation of this file.
1 #include <chrono>
2 #include <cstdlib>
3 #include <ctime>
4 #include <emmintrin.h>
5 #include <immintrin.h>
6 #include <iostream>
7 #include <random>
8 
9 const int N = 12;
10 // const int N = 1000;
11 
12 void multiply_matrices(float A[N][N], float B[N][N], float C[N][N]) {
13  auto start = std::chrono::high_resolution_clock::now();
14 
15  /*
16  __m128 row, col, res;
17  float tmp[N];
18 
19  for (int i = 0; i < N; i++) {
20  for (int j = 0; j < N; j++) {
21  // Load a row from the first matrix into a vector
22  row = _mm_load_ps(&A[i][0]);
23 
24  // Load a column from the second matrix into a vector
25  col = _mm_set_ps(B[0][j], B[1][j], B[2][j], B[3][j]);
26 
27  // Multiply the row and column vectors
28  res = _mm_mul_ps(row, col);
29 
30  // Sum the four values in the result vector
31  _mm_store_ps(tmp, res);
32  C[i][j] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
33  }
34  }*/
35  __m128 row, col, res;
36  float tmp[N];
37 
38  for (int i = 0; i < N; i++) {
39  for (int j = 0; j < N; j++) {
40  // Load a row from the first matrix into a vector
41  row = _mm_load_ps(&A[i][0]);
42 
43  // Load a column from the second matrix into a vector
44  col = _mm_set_ps(B[3][j], B[2][j], B[1][j], B[0][j]);
45 
46  // Multiply the row and column vectors
47  res = _mm_mul_ps(row, col);
48 
49  // Sum the four values in the result vector
50  _mm_store_ps(tmp, res);
51  C[i][j] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
52  }
53  }
54 
55  auto end = std::chrono::high_resolution_clock::now();
56 
57  std::chrono::duration<double, std::nano> elapsed = end - start;
58  std::cout << "Elapsed time: " << elapsed.count() << " ns\n";
59 }
60 
61 void multiply_matrices_std(float A[N][N], float B[N][N], float C[N][N]) {
62  auto start = std::chrono::high_resolution_clock::now();
63  for (size_t i = 0; i < N; ++i) {
64  for (size_t j = 0; j < N; ++j) {
65  C[i][j] = A[i][0] * B[0][j];
66  for (size_t k = 1; k < N; ++k) {
67  C[i][j] += A[i][k] * B[k][j];
68  }
69  }
70  }
71 
72  auto end = std::chrono::high_resolution_clock::now();
73 
74  std::chrono::duration<double, std::nano> elapsed = end - start;
75  std::cout << "Elapsed time: " << elapsed.count() << " ns\n";
76 }
77 
78 int main() {
79 
80  /*
81  float A[N][N] = {{1, 2, 3, 4},
82  {5, 6, 7, 8},
83  {9, 10, 11, 12},
84  {13, 14, 15, 16}};
85 
86  float B[N][N] = {{16, 15, 14, 13},
87  {12, 11, 10, 9},
88  {8, 7, 6, 5},
89  {4, 3, 2, 1}};
90  */
91  float A[N][N] = {
92  {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
93  {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
94  {25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36},
95  {37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
96  {49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60},
97  {61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72},
98  {73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84},
99  {85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96},
100  {97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108},
101  {109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120},
102  {121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132},
103  {133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144}};
104 
105  float B[N][N] = {
106  {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
107  {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
108  {25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36},
109  {37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
110  {49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60},
111  {61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72},
112  {73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84},
113  {85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96},
114  {97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108},
115  {109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120},
116  {121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132},
117  {133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144}};
118 
119  float C[N][N];
120 
121  /*(std::srand(std::time(nullptr)); // seed the random number generator
122 
123  float A[N][N], B[N][N];
124 
125  // Fill the arrays with random values
126  for (int i = 0; i < N; i++) {
127  for (int j = 0; j < N; j++) {
128  A[i][j] = static_cast<float>(std::rand()) /
129  static_cast<float>(RAND_MAX) * 10.0f; B[i][j] =
130  static_cast<float>(std::rand()) / static_cast<float>(RAND_MAX) * 10.0f;
131  }
132  }
133 
134  float C[N][N];
135  */
136 
137  multiply_matrices(A, B, C);
138 
139  // Print the result matrix
140  for (int i = 0; i < N; i++) {
141  for (int j = 0; j < N; j++) {
142  std::cout << C[i][j] << " ";
143  }
144  std::cout << std::endl;
145  }
146  std::cout << std::endl;
147 
148  float Z[N][N];
149 
151 
152  // Print the result matrix
153  for (int i = 0; i < N; i++) {
154  for (int j = 0; j < N; j++) {
155  std::cout << Z[i][j] << " ";
156  }
157  std::cout << std::endl;
158  }
159 
160  return 0;
161 }
void multiply_matrices(float A[N][N], float B[N][N], float C[N][N])
void multiply_matrices_std(float A[N][N], float B[N][N], float C[N][N])
const int N
int main()
list C
Definition: linalg.py:24
list A
Definition: linalg.py:22
list B
Definition: linalg.py:23