openGPMP
Open Source Mathematics Package
Public Member Functions | Static Public Attributes | List of all members
gpmp::linalg::IGEMM Class Reference

Class for performing matrix multiplication on int type arrays. More...

#include <_igemm.hpp>

Public Member Functions

void pack_micro_A (int k, const int *A, int incRowA, int incColA, int *buffer)
 Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding. More...
 
void pack_buffer_A (int mc, int kc, const int *A, int incRowA, int incColA, int *buffer)
 Packs panels from A with padding if needed. More...
 
void pack_micro_B (int k, const int *B, int incRowB, int incColB, int *buffer)
 Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding. More...
 
void pack_buffer_B (int kc, int nc, const int *B, int incRowB, int incColB, int *buffer)
 Packs panels from B with padding if needed. More...
 
void igemm_micro_kernel (int kc, int alpha, const int *A, const int *B, int beta, int *C, int incRowC, int incColC)
 Computes the micro kernel that multiplies panels from A and B. More...
 
void igeaxpy (int m, int n, int alpha, const int *X, int incRowX, int incColX, int *Y, int incRowY, int incColY)
 Computes Y += alpha*X (int precision AX + Y) More...
 
void igescal (int m, int n, int alpha, int *X, int incRowX, int incColX)
 Scales elements of X by alpha. More...
 
void igemm_macro_kernel (int mc, int nc, int kc, int alpha, int beta, int *C, int incRowC, int incColC)
 Macro kernel for the multiplication of blocks of A and B. More...
 
void igemm_nn (int m, int n, int k, int alpha, const int *A, int incRowA, int incColA, const int *B, int incRowB, int incColB, int beta, int *C, int incRowC, int incColC)
 Main IGEMM entrypoint, computes C <- beta*C + alpha*A*B. More...
 

Static Public Attributes

static int IGEMM_BUFF_A [BLOCK_SZ_M *BLOCK_SZ_K]
 
static int IGEMM_BUFF_B [BLOCK_SZ_K *BLOCK_SZ_N]
 
static int IGEMM_BUFF_C [BLOCK_SZ_MR *BLOCK_SZ_NR]
 

Detailed Description

Class for performing matrix multiplication on int type arrays.

Definition at line 50 of file _igemm.hpp.

Member Function Documentation

◆ igeaxpy()

void gpmp::linalg::IGEMM::igeaxpy ( int  m,
int  n,
int  alpha,
const int *  X,
int  incRowX,
int  incColX,
int *  Y,
int  incRowY,
int  incColY 
)

Computes Y += alpha*X (int precision AX + Y)

Parameters
mNumber of rows
nNumber of columns
alphaScalar alpha
XPointer to matrix X
incRowXIncrement between consecutive rows of X
incColXIncrement between consecutive columns of X
YPointer to matrix Y
incRowYIncrement between consecutive rows of Y
incColYIncrement between consecutive columns of Y

Definition at line 208 of file igemm_arr.cpp.

216  {
217  int i, j;
218 
219  if (alpha != 1) {
220  for (j = 0; j < n; ++j) {
221  for (i = 0; i < m; ++i) {
222  Y[i * incRowY + j * incColY] +=
223  alpha * X[i * incRowX + j * incColX];
224  }
225  }
226  }
227 
228  else {
229  for (j = 0; j < n; ++j) {
230  for (i = 0; i < m; ++i) {
231  Y[i * incRowY + j * incColY] += X[i * incRowX + j * incColX];
232  }
233  }
234  }
235 }

◆ igemm_macro_kernel()

void gpmp::linalg::IGEMM::igemm_macro_kernel ( int  mc,
int  nc,
int  kc,
int  alpha,
int  beta,
int *  C,
int  incRowC,
int  incColC 
)

Macro kernel for the multiplication of blocks of A and B.

Parameters
mcNumber of rows in the block of C
ncNumber of columns in the block of C
kcNumber of columns in the blocks of A and rows of B
alphaScalar alpha
betaScalar beta
CPointer to the output matrix C
incRowCIncrement between consecutive rows of C
incColCIncrement between consecutive columns of C

Definition at line 265 of file igemm_arr.cpp.

272  {
273 
274  int mp = (mc + BLOCK_SZ_MR - 1) / BLOCK_SZ_MR;
275  int np = (nc + BLOCK_SZ_NR - 1) / BLOCK_SZ_NR;
276 
277  int _mr = mc % BLOCK_SZ_MR;
278  int _nr = nc % BLOCK_SZ_NR;
279 
280  int mr, nr;
281  int i, j;
282 
283  for (j = 0; j < np; ++j) {
284  nr = (j != np - 1 || _nr == 0) ? BLOCK_SZ_NR : _nr;
285 
286  for (i = 0; i < mp; ++i) {
287  mr = (i != mp - 1 || _mr == 0) ? BLOCK_SZ_MR : _mr;
288 
289  if (mr == BLOCK_SZ_MR && nr == BLOCK_SZ_NR) {
291  kc,
292  alpha,
293  &IGEMM_BUFF_A[i * kc * BLOCK_SZ_MR],
294  &IGEMM_BUFF_B[j * kc * BLOCK_SZ_NR],
295  beta,
296  &C[i * BLOCK_SZ_MR * incRowC + j * BLOCK_SZ_NR * incColC],
297  incRowC,
298  incColC);
299  } else {
301  alpha,
302  &IGEMM_BUFF_A[i * kc * BLOCK_SZ_MR],
303  &IGEMM_BUFF_B[j * kc * BLOCK_SZ_NR],
304  0,
305  IGEMM_BUFF_C,
306  1,
307  BLOCK_SZ_MR);
308  igescal(
309  mr,
310  nr,
311  beta,
312  &C[i * BLOCK_SZ_MR * incRowC + j * BLOCK_SZ_NR * incColC],
313  incRowC,
314  incColC);
315  igeaxpy(
316  mr,
317  nr,
318  1.0,
319  IGEMM_BUFF_C,
320  1,
321  BLOCK_SZ_MR,
322  &C[i * BLOCK_SZ_MR * incRowC + j * BLOCK_SZ_NR * incColC],
323  incRowC,
324  incColC);
325  }
326  }
327  }
328 }
#define BLOCK_SZ_MR
Definition: _dgemm.hpp:40
#define BLOCK_SZ_NR
Definition: _dgemm.hpp:41
void igeaxpy(int m, int n, int alpha, const int *X, int incRowX, int incColX, int *Y, int incRowY, int incColY)
Computes Y += alpha*X (int precision AX + Y)
Definition: igemm_arr.cpp:208
void igemm_micro_kernel(int kc, int alpha, const int *A, const int *B, int beta, int *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
Definition: igemm_arr.cpp:146
void igescal(int m, int n, int alpha, int *X, int incRowX, int incColX)
Scales elements of X by alpha.
Definition: igemm_arr.cpp:238
static int IGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N]
Definition: _igemm.hpp:55
static int IGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K]
Definition: _igemm.hpp:53
static int IGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR]
Definition: _igemm.hpp:57
list C
Definition: linalg.py:24

References BLOCK_SZ_MR, BLOCK_SZ_NR, and python.linalg::C.

◆ igemm_micro_kernel()

void gpmp::linalg::IGEMM::igemm_micro_kernel ( int  kc,
int  alpha,
const int *  A,
const int *  B,
int  beta,
int *  C,
int  incRowC,
int  incColC 
)

Computes the micro kernel that multiplies panels from A and B.

Parameters
kcNumber of columns
alphaScalar alpha
APointer to the packed panel A
BPointer to the packed panel B
betaScalar beta
CPointer to the output matrix C
incRowCIncrement between consecutive rows of C
incColCIncrement between consecutive columns of C

Definition at line 146 of file igemm_arr.cpp.

153  {
154  int AB[BLOCK_SZ_MR * BLOCK_SZ_NR];
155 
156  int i, j, l;
157 
158  // Compute AB = A*B
159  for (l = 0; l < BLOCK_SZ_MR * BLOCK_SZ_NR; ++l) {
160  AB[l] = 0;
161  }
162  for (l = 0; l < kc; ++l) {
163  for (j = 0; j < BLOCK_SZ_NR; ++j) {
164  for (i = 0; i < BLOCK_SZ_MR; ++i) {
165  AB[i + j * BLOCK_SZ_MR] += A[i] * B[j];
166  }
167  }
168  A += BLOCK_SZ_MR;
169  B += BLOCK_SZ_NR;
170  }
171 
172  // Update C <- beta*C
173  if (beta == 0) {
174  for (j = 0; j < BLOCK_SZ_NR; ++j) {
175  for (i = 0; i < BLOCK_SZ_MR; ++i) {
176  C[i * incRowC + j * incColC] = 0;
177  }
178  }
179  } else if (beta != 1) {
180  for (j = 0; j < BLOCK_SZ_NR; ++j) {
181  for (i = 0; i < BLOCK_SZ_MR; ++i) {
182  C[i * incRowC + j * incColC] *= beta;
183  }
184  }
185  }
186 
187  // Update C <- C + alpha*AB (note: the case alpha==0 was already treated
188  // in
189  // the above layer igemm_nn)
190  if (alpha == 1) {
191  for (j = 0; j < BLOCK_SZ_NR; ++j) {
192  for (i = 0; i < BLOCK_SZ_MR; ++i) {
193  C[i * incRowC + j * incColC] += AB[i + j * BLOCK_SZ_MR];
194  }
195  }
196  }
197 
198  else {
199  for (j = 0; j < BLOCK_SZ_NR; ++j) {
200  for (i = 0; i < BLOCK_SZ_MR; ++i) {
201  C[i * incRowC + j * incColC] += alpha * AB[i + j * BLOCK_SZ_MR];
202  }
203  }
204  }
205 }
list A
Definition: linalg.py:22
list B
Definition: linalg.py:23

References python.linalg::A, python.linalg::B, BLOCK_SZ_MR, BLOCK_SZ_NR, and python.linalg::C.

◆ igemm_nn()

void gpmp::linalg::IGEMM::igemm_nn ( int  m,
int  n,
int  k,
int  alpha,
const int *  A,
int  incRowA,
int  incColA,
const int *  B,
int  incRowB,
int  incColB,
int  beta,
int *  C,
int  incRowC,
int  incColC 
)

Main IGEMM entrypoint, computes C <- beta*C + alpha*A*B.

Parameters
mNumber of rows of A and rows of C
nNumber of columns of B and columns of C
kNumber of columns of A and rows of B
alphaScalar alpha
APointer to matrix A
incRowAIncrement between consecutive rows of A
incColAIncrement between consecutive columns of A
BPointer to matrix B
incRowBIncrement between consecutive rows of B
incColBIncrement between consecutive columns of B
betaScalar beta
CPointer to matrix C
incRowCIncrement between consecutive rows of C
incColCIncrement between consecutive columns of C

Definition at line 331 of file igemm_arr.cpp.

344  {
345  int mb = (m + BLOCK_SZ_M - 1) / BLOCK_SZ_M;
346  int nb = (n + BLOCK_SZ_N - 1) / BLOCK_SZ_N;
347  int kb = (k + BLOCK_SZ_K - 1) / BLOCK_SZ_K;
348 
349  int _mc = m % BLOCK_SZ_M;
350  int _nc = n % BLOCK_SZ_N;
351  int _kc = k % BLOCK_SZ_K;
352 
353  int mc, nc, kc;
354  int i, j, l;
355 
356  int _beta;
357 
358  if (alpha == 0 || k == 0) {
359  igescal(m, n, beta, C, incRowC, incColC);
360  return;
361  }
362 
363  for (j = 0; j < nb; ++j) {
364  nc = (j != nb - 1 || _nc == 0) ? BLOCK_SZ_N : _nc;
365 
366  for (l = 0; l < kb; ++l) {
367  kc = (l != kb - 1 || _kc == 0) ? BLOCK_SZ_K : _kc;
368  _beta = (l == 0) ? beta : 1.0;
369 
371  kc,
372  nc,
373  &B[l * BLOCK_SZ_K * incRowB + j * BLOCK_SZ_N * incColB],
374  incRowB,
375  incColB,
376  IGEMM_BUFF_B);
377 
378  for (i = 0; i < mb; ++i) {
379  mc = (i != mb - 1 || _mc == 0) ? BLOCK_SZ_M : _mc;
380 
382  mc,
383  kc,
384  &A[i * BLOCK_SZ_M * incRowA + l * BLOCK_SZ_K * incColA],
385  incRowA,
386  incColA,
387  IGEMM_BUFF_A);
388 
390  mc,
391  nc,
392  kc,
393  alpha,
394  _beta,
395  &C[i * BLOCK_SZ_M * incRowC + j * BLOCK_SZ_N * incColC],
396  incRowC,
397  incColC);
398  }
399  }
400  }
401 }
#define BLOCK_SZ_M
Definition: _dgemm.hpp:37
#define BLOCK_SZ_N
Definition: _dgemm.hpp:39
#define BLOCK_SZ_K
Definition: _dgemm.hpp:38
void igemm_macro_kernel(int mc, int nc, int kc, int alpha, int beta, int *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
Definition: igemm_arr.cpp:265
void pack_buffer_B(int kc, int nc, const int *B, int incRowB, int incColB, int *buffer)
Packs panels from B with padding if needed.
Definition: igemm_arr.cpp:115
void pack_buffer_A(int mc, int kc, const int *A, int incRowA, int incColA, int *buffer)
Packs panels from A with padding if needed.
Definition: igemm_arr.cpp:67

References python.linalg::A, python.linalg::B, BLOCK_SZ_K, BLOCK_SZ_M, BLOCK_SZ_N, and python.linalg::C.

◆ igescal()

void gpmp::linalg::IGEMM::igescal ( int  m,
int  n,
int  alpha,
int *  X,
int  incRowX,
int  incColX 
)

Scales elements of X by alpha.

Parameters
mNumber of rows
nNumber of columns
alphaScalar alpha
XPointer to matrix X
incRowXIncrement between consecutive rows of X
incColXIncrement between consecutive columns of X

Definition at line 238 of file igemm_arr.cpp.

243  {
244  int i, j;
245 
246  if (alpha != 0) {
247  for (j = 0; j < n; ++j) {
248  for (i = 0; i < m; ++i) {
249  X[i * incRowX + j * incColX] *= alpha;
250  }
251  }
252  }
253 
254  else {
255  for (j = 0; j < n; ++j) {
256  for (i = 0; i < m; ++i) {
257  X[i * incRowX + j * incColX] = 0;
258  }
259  }
260  }
261 }

◆ pack_buffer_A()

void gpmp::linalg::IGEMM::pack_buffer_A ( int  mc,
int  kc,
const int *  A,
int  incRowA,
int  incColA,
int *  buffer 
)

Packs panels from A with padding if needed.

Parameters
mcNumber of rows to pack
kcNumber of columns to pack
APointer to the source matrix A
incRowAIncrement between consecutive rows of A
incColAIncrement between consecutive columns of A
bufferPointer to the buffer to store the packed panels

Definition at line 67 of file igemm_arr.cpp.

72  {
73  int mp = mc / BLOCK_SZ_MR;
74  int _mr = mc % BLOCK_SZ_MR;
75 
76  int i, j;
77 
78  for (i = 0; i < mp; ++i) {
79  pack_micro_A(kc, A, incRowA, incColA, buffer);
80  buffer += kc * BLOCK_SZ_MR;
81  A += BLOCK_SZ_MR * incRowA;
82  }
83  if (_mr > 0) {
84  for (j = 0; j < kc; ++j) {
85  for (i = 0; i < _mr; ++i) {
86  buffer[i] = A[i * incRowA];
87  }
88  for (i = _mr; i < BLOCK_SZ_MR; ++i) {
89  buffer[i] = 0;
90  }
91  buffer += BLOCK_SZ_MR;
92  A += incColA;
93  }
94  }
95 }
void pack_micro_A(int k, const int *A, int incRowA, int incColA, int *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
Definition: igemm_arr.cpp:50

References python.linalg::A, and BLOCK_SZ_MR.

◆ pack_buffer_B()

void gpmp::linalg::IGEMM::pack_buffer_B ( int  kc,
int  nc,
const int *  B,
int  incRowB,
int  incColB,
int *  buffer 
)

Packs panels from B with padding if needed.

Parameters
kcNumber of rows to pack
ncNumber of columns to pack
BPointer to the source matrix B
incRowBIncrement between consecutive rows of B
incColBIncrement between consecutive columns of B
bufferPointer to the buffer to store the packed panels

Definition at line 115 of file igemm_arr.cpp.

120  {
121  int np = nc / BLOCK_SZ_NR;
122  int _nr = nc % BLOCK_SZ_NR;
123 
124  int i, j;
125 
126  for (j = 0; j < np; ++j) {
127  pack_micro_B(kc, B, incRowB, incColB, buffer);
128  buffer += kc * BLOCK_SZ_NR;
129  B += BLOCK_SZ_NR * incColB;
130  }
131  if (_nr > 0) {
132  for (i = 0; i < kc; ++i) {
133  for (j = 0; j < _nr; ++j) {
134  buffer[j] = B[j * incColB];
135  }
136  for (j = _nr; j < BLOCK_SZ_NR; ++j) {
137  buffer[j] = 0;
138  }
139  buffer += BLOCK_SZ_NR;
140  B += incRowB;
141  }
142  }
143 }
void pack_micro_B(int k, const int *B, int incRowB, int incColB, int *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
Definition: igemm_arr.cpp:98

References python.linalg::B, and BLOCK_SZ_NR.

◆ pack_micro_A()

void gpmp::linalg::IGEMM::pack_micro_A ( int  k,
const int *  A,
int  incRowA,
int  incColA,
int *  buffer 
)

Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.

Parameters
kNumber of columns to pack
APointer to the source matrix A
incRowAIncrement between consecutive rows of A
incColAIncrement between consecutive columns of A
bufferPointer to the buffer to store the packed micro panels

Definition at line 50 of file igemm_arr.cpp.

54  {
55  int i, j;
56 
57  for (j = 0; j < k; ++j) {
58  for (i = 0; i < BLOCK_SZ_MR; ++i) {
59  buffer[i] = A[i * incRowA];
60  }
61  buffer += BLOCK_SZ_MR;
62  A += incColA;
63  }
64 }

References python.linalg::A, and BLOCK_SZ_MR.

◆ pack_micro_B()

void gpmp::linalg::IGEMM::pack_micro_B ( int  k,
const int *  B,
int  incRowB,
int  incColB,
int *  buffer 
)

Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.

Parameters
kNumber of rows to pack
BPointer to the source matrix B
incRowBIncrement between consecutive rows of B
incColBIncrement between consecutive columns of B
bufferPointer to the buffer to store the packed micro panels

Definition at line 98 of file igemm_arr.cpp.

102  {
103  int i, j;
104 
105  for (i = 0; i < k; ++i) {
106  for (j = 0; j < BLOCK_SZ_NR; ++j) {
107  buffer[j] = B[j * incColB];
108  }
109  buffer += BLOCK_SZ_NR;
110  B += incRowB;
111  }
112 }

References python.linalg::B, and BLOCK_SZ_NR.

Member Data Documentation

◆ IGEMM_BUFF_A

int gpmp::linalg::IGEMM::IGEMM_BUFF_A
static

< Buffer for storing packed micro panels of A
Buffer for storing packed micro panels of B

Integer type GEneral Matrix-Matrix product

Definition at line 53 of file _igemm.hpp.

◆ IGEMM_BUFF_B

int gpmp::linalg::IGEMM::IGEMM_BUFF_B
static

Buffer for storing intermediate results

Definition at line 55 of file _igemm.hpp.

◆ IGEMM_BUFF_C

int gpmp::linalg::IGEMM::IGEMM_BUFF_C
static

Definition at line 57 of file _igemm.hpp.


The documentation for this class was generated from the following files: