openGPMP
Open Source Mathematics Package
_igemm.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 #ifndef _IGEMM_HPP
34 #define _IGEMM_HPP
35 
37 #define BLOCK_SZ_M 384
38 #define BLOCK_SZ_K 384
39 #define BLOCK_SZ_N 4096
40 #define BLOCK_SZ_MR 4
41 #define BLOCK_SZ_NR 4
43 namespace gpmp {
44 namespace linalg {
45 
50 class IGEMM {
51  public:
58 
69  void
70  pack_micro_A(int k, const int *A, int incRowA, int incColA, int *buffer);
71 
82  void pack_buffer_A(int mc,
83  int kc,
84  const int *A,
85  int incRowA,
86  int incColA,
87  int *buffer);
88 
99  void
100  pack_micro_B(int k, const int *B, int incRowB, int incColB, int *buffer);
101 
112  void pack_buffer_B(int kc,
113  int nc,
114  const int *B,
115  int incRowB,
116  int incColB,
117  int *buffer);
118 
131  void igemm_micro_kernel(int kc,
132  int alpha,
133  const int *A,
134  const int *B,
135  int beta,
136  int *C,
137  int incRowC,
138  int incColC);
139 
153  void igeaxpy(int m,
154  int n,
155  int alpha,
156  const int *X,
157  int incRowX,
158  int incColX,
159  int *Y,
160  int incRowY,
161  int incColY);
162 
173  void igescal(int m, int n, int alpha, int *X, int incRowX, int incColX);
174 
187  void igemm_macro_kernel(int mc,
188  int nc,
189  int kc,
190  int alpha,
191  int beta,
192  int *C,
193  int incRowC,
194  int incColC);
195 
214  void igemm_nn(int m,
215  int n,
216  int k,
217  int alpha,
218  const int *A,
219  int incRowA,
220  int incColA,
221  const int *B,
222  int incRowB,
223  int incColB,
224  int beta,
225  int *C,
226  int incRowC,
227  int incColC);
228 };
229 
230 } // namespace linalg
231 } // namespace gpmp
232 
233 #endif
#define BLOCK_SZ_M
Definition: _igemm.hpp:37
#define BLOCK_SZ_MR
Definition: _igemm.hpp:40
#define BLOCK_SZ_N
Definition: _igemm.hpp:39
#define BLOCK_SZ_NR
Definition: _igemm.hpp:41
#define BLOCK_SZ_K
Definition: _igemm.hpp:38
Class for performing matrix multiplication on int type arrays.
Definition: _igemm.hpp:50
void igemm_macro_kernel(int mc, int nc, int kc, int alpha, int beta, int *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
Definition: igemm_arr.cpp:265
void igemm_nn(int m, int n, int k, int alpha, const int *A, int incRowA, int incColA, const int *B, int incRowB, int incColB, int beta, int *C, int incRowC, int incColC)
Main IGEMM entrypoint, computes C <- beta*C + alpha*A*B.
Definition: igemm_arr.cpp:331
void pack_buffer_B(int kc, int nc, const int *B, int incRowB, int incColB, int *buffer)
Packs panels from B with padding if needed.
Definition: igemm_arr.cpp:115
void igeaxpy(int m, int n, int alpha, const int *X, int incRowX, int incColX, int *Y, int incRowY, int incColY)
Computes Y += alpha*X (int precision AX + Y)
Definition: igemm_arr.cpp:208
void igemm_micro_kernel(int kc, int alpha, const int *A, const int *B, int beta, int *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
Definition: igemm_arr.cpp:146
void igescal(int m, int n, int alpha, int *X, int incRowX, int incColX)
Scales elements of X by alpha.
Definition: igemm_arr.cpp:238
static int IGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N]
Definition: _igemm.hpp:55
void pack_micro_A(int k, const int *A, int incRowA, int incColA, int *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
Definition: igemm_arr.cpp:50
static int IGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K]
Definition: _igemm.hpp:53
void pack_micro_B(int k, const int *B, int incRowB, int incColB, int *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
Definition: igemm_arr.cpp:98
static int IGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR]
Definition: _igemm.hpp:57
void pack_buffer_A(int mc, int kc, const int *A, int incRowA, int incColA, int *buffer)
Packs panels from A with padding if needed.
Definition: igemm_arr.cpp:67
The source C++ openGPMP namespace.
list C
Definition: linalg.py:24
list A
Definition: linalg.py:22
list B
Definition: linalg.py:23