openGPMP
Open Source Mathematics Package
_dgemm.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 #ifndef _DGEMM_HPP
34 #define _DGEMM_HPP
35 
37 #define BLOCK_SZ_M 2048
38 #define BLOCK_SZ_K 2048
39 #define BLOCK_SZ_N 4096
40 #define BLOCK_SZ_MR 4
41 #define BLOCK_SZ_NR 4
43 namespace gpmp {
44 namespace linalg {
45 
50 class DGEMM {
51  public:
53  static double DGEMM_BUFF_A[BLOCK_SZ_M * BLOCK_SZ_K]
54  __attribute__((aligned(32)));
56  static double DGEMM_BUFF_B[BLOCK_SZ_K * BLOCK_SZ_N]
57  __attribute__((aligned(32)));
59  static double DGEMM_BUFF_C[BLOCK_SZ_MR * BLOCK_SZ_NR]
60  __attribute__((aligned(32)));
61 
72  void pack_micro_A(int k,
73  const double *A,
74  int incRowA,
75  int incColA,
76  double *buffer);
77 
88  void pack_buffer_A(int mc,
89  int kc,
90  const double *A,
91  int incRowA,
92  int incColA,
93  double *buffer);
94 
105  void pack_micro_B(int k,
106  const double *B,
107  int incRowB,
108  int incColB,
109  double *buffer);
110 
121  void pack_buffer_B(int kc,
122  int nc,
123  const double *B,
124  int incRowB,
125  int incColB,
126  double *buffer);
127 
140  void dgemm_micro_kernel(int kc,
141  double alpha,
142  const double *A,
143  const double *B,
144  double beta,
145  double *C,
146  int incRowC,
147  int incColC);
148 
171  void dgemm_micro_kernel(long kc,
172  double alpha,
173  const double *A,
174  const double *B,
175  double beta,
176  double *C,
177  long incRowC,
178  long incColC,
179  const double *nextA,
180  const double *nextB);
194  void dgeaxpy(int m,
195  int n,
196  double alpha,
197  const double *X,
198  int incRowX,
199  int incColX,
200  double *Y,
201  int incRowY,
202  int incColY);
203 
214  void
215  dgescal(int m, int n, double alpha, double *X, int incRowX, int incColX);
216 
229  void dgemm_macro_kernel(int mc,
230  int nc,
231  int kc,
232  double alpha,
233  double beta,
234  double *C,
235  int incRowC,
236  int incColC);
237 
256  void dgemm_nn(int m,
257  int n,
258  int k,
259  double alpha,
260  const double *A,
261  int incRowA,
262  int incColA,
263  const double *B,
264  int incRowB,
265  int incColB,
266  double beta,
267  double *C,
268  int incRowC,
269  int incColC);
270 };
271 
272 } // namespace linalg
273 } // namespace gpmp
274 
275 #endif
#define BLOCK_SZ_M
Definition: _dgemm.hpp:37
#define BLOCK_SZ_MR
Definition: _dgemm.hpp:40
#define BLOCK_SZ_N
Definition: _dgemm.hpp:39
#define BLOCK_SZ_NR
Definition: _dgemm.hpp:41
#define BLOCK_SZ_K
Definition: _dgemm.hpp:38
Class for performing matrix multiplication on double type arrays.
Definition: _dgemm.hpp:50
void dgemm_micro_kernel(int kc, double alpha, const double *A, const double *B, double beta, double *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
void pack_buffer_B(int kc, int nc, const double *B, int incRowB, int incColB, double *buffer)
Packs panels from B with padding if needed.
Definition: dgemm_arr.cpp:186
void pack_micro_A(int k, const double *A, int incRowA, int incColA, double *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
Definition: dgemm_arr.cpp:121
static double DGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N] __attribute__((aligned(32)))
static double DGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR] __attribute__((aligned(32)))
void dgeaxpy(int m, int n, double alpha, const double *X, int incRowX, int incColX, double *Y, int incRowY, int incColY)
Computes Y += alpha*X (double precision AX + Y)
Definition: dgemm_arr.cpp:217
void pack_micro_B(int k, const double *B, int incRowB, int incColB, double *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
Definition: dgemm_arr.cpp:169
void dgescal(int m, int n, double alpha, double *X, int incRowX, int incColX)
Scales elements of X by alpha.
Definition: dgemm_arr.cpp:248
void dgemm_macro_kernel(int mc, int nc, int kc, double alpha, double beta, double *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
Definition: dgemm_arr.cpp:275
void pack_buffer_A(int mc, int kc, const double *A, int incRowA, int incColA, double *buffer)
Packs panels from A with padding if needed.
Definition: dgemm_arr.cpp:138
static double DGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K] __attribute__((aligned(32)))
void dgemm_nn(int m, int n, int k, double alpha, const double *A, int incRowA, int incColA, const double *B, int incRowB, int incColB, double beta, double *C, int incRowC, int incColC)
Main DGEMM entrypoint, computes C <- beta*C + alpha*A*B.
Definition: dgemm_arr.cpp:381
The source C++ openGPMP namespace.
list C
Definition: linalg.py:24
list A
Definition: linalg.py:22
list B
Definition: linalg.py:23