openGPMP
Open Source Mathematics Package
_sgemm.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 #ifndef _SGEMM_HPP
34 #define _SGEMM_HPP
35 
37 #define BLOCK_SZ_M 384
38 #define BLOCK_SZ_K 384
39 #define BLOCK_SZ_N 4096
40 #define BLOCK_SZ_MR 4
41 #define BLOCK_SZ_NR 4
43 namespace gpmp {
44 namespace linalg {
45 
50 class SGEMM {
51  public:
58 
69  void pack_micro_A(int k,
70  const float *A,
71  int incRowA,
72  int incColA,
73  float *buffer);
74 
85  void pack_buffer_A(int mc,
86  int kc,
87  const float *A,
88  int incRowA,
89  int incColA,
90  float *buffer);
91 
102  void pack_micro_B(int k,
103  const float *B,
104  int incRowB,
105  int incColB,
106  float *buffer);
107 
118  void pack_buffer_B(int kc,
119  int nc,
120  const float *B,
121  int incRowB,
122  int incColB,
123  float *buffer);
124 
137  void sgemm_micro_kernel(int kc,
138  float alpha,
139  const float *A,
140  const float *B,
141  float beta,
142  float *C,
143  int incRowC,
144  int incColC);
145 
159  void sgeaxpy(int m,
160  int n,
161  float alpha,
162  const float *X,
163  int incRowX,
164  int incColX,
165  float *Y,
166  int incRowY,
167  int incColY);
168 
179  void sgescal(int m, int n, float alpha, float *X, int incRowX, int incColX);
180 
193  void sgemm_macro_kernel(int mc,
194  int nc,
195  int kc,
196  float alpha,
197  float beta,
198  float *C,
199  int incRowC,
200  int incColC);
201 
220  void sgemm_nn(int m,
221  int n,
222  int k,
223  float alpha,
224  const float *A,
225  int incRowA,
226  int incColA,
227  const float *B,
228  int incRowB,
229  int incColB,
230  float beta,
231  float *C,
232  int incRowC,
233  int incColC);
234 };
235 
236 } // namespace linalg
237 } // namespace gpmp
238 
239 #endif
#define BLOCK_SZ_M
Definition: _sgemm.hpp:37
#define BLOCK_SZ_MR
Definition: _sgemm.hpp:40
#define BLOCK_SZ_N
Definition: _sgemm.hpp:39
#define BLOCK_SZ_NR
Definition: _sgemm.hpp:41
#define BLOCK_SZ_K
Definition: _sgemm.hpp:38
Class for performing matrix multiplication on float type arrays.
Definition: _sgemm.hpp:50
static float SGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N]
Definition: _sgemm.hpp:55
void pack_micro_A(int k, const float *A, int incRowA, int incColA, float *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
Definition: sgemm_arr.cpp:50
void sgemm_macro_kernel(int mc, int nc, int kc, float alpha, float beta, float *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
Definition: sgemm_arr.cpp:266
void pack_buffer_B(int kc, int nc, const float *B, int incRowB, int incColB, float *buffer)
Packs panels from B with padding if needed.
Definition: sgemm_arr.cpp:115
void sgeaxpy(int m, int n, float alpha, const float *X, int incRowX, int incColX, float *Y, int incRowY, int incColY)
Computes Y += alpha*X (float precision AX + Y)
Definition: sgemm_arr.cpp:208
void sgemm_micro_kernel(int kc, float alpha, const float *A, const float *B, float beta, float *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
Definition: sgemm_arr.cpp:146
static float SGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K]
Definition: _sgemm.hpp:53
void pack_buffer_A(int mc, int kc, const float *A, int incRowA, int incColA, float *buffer)
Packs panels from A with padding if needed.
Definition: sgemm_arr.cpp:67
static float SGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR]
Definition: _sgemm.hpp:57
void sgemm_nn(int m, int n, int k, float alpha, const float *A, int incRowA, int incColA, const float *B, int incRowB, int incColB, float beta, float *C, int incRowC, int incColC)
Main SGEMM entrypoint, computes C <- beta*C + alpha*A*B.
Definition: sgemm_arr.cpp:332
void sgescal(int m, int n, float alpha, float *X, int incRowX, int incColX)
Scales elements of X by alpha.
Definition: sgemm_arr.cpp:239
void pack_micro_B(int k, const float *B, int incRowB, int incColB, float *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
Definition: sgemm_arr.cpp:98
The source C++ openGPMP namespace.
list C
Definition: linalg.py:24
list A
Definition: linalg.py:22
list B
Definition: linalg.py:23