openGPMP/__sgemm_8hpp_source.html

 /*************************************************************************

  *

  *  Project

  *                         _____ _____  __  __ _____

  *                        / ____|  __ \|  \/  |  __ \

  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |

  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/

  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |

  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|

  *      | |

  *      |_|

  *

  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.

  *

  * This software is licensed as described in the file LICENSE, which

  * you should have received as part of this distribution. The terms

  * among other details are referenced in the official documentation

  * seen here : https://akielaries.github.io/openGPMP/ along with

  * important files seen in this project.

  *

  * You may opt to use, copy, modify, merge, publish, distribute

  * and/or sell copies of the Software, and permit persons to whom

  * the Software is furnished to do so, under the terms of the

  * LICENSE file. As this is an Open Source effort, all implementations

  * must be of the same methodology.

  *

  *

  *

  * This software is distributed on an AS IS basis, WITHOUT

  * WARRANTY OF ANY KIND, either express or implied.

  *

  ************************************************************************/

 #ifndef _SGEMM_HPP

 #define _SGEMM_HPP


 #define BLOCK_SZ_M 384

 #define BLOCK_SZ_K 384

 #define BLOCK_SZ_N 4096

 #define BLOCK_SZ_MR 4

 #define BLOCK_SZ_NR 4

 namespace gpmp {

 namespace linalg {


 class SGEMM {

   public:

     static float SGEMM_BUFF_A[BLOCK_SZ_M * BLOCK_SZ_K];

     static float SGEMM_BUFF_B[BLOCK_SZ_K * BLOCK_SZ_N];

     static float SGEMM_BUFF_C[BLOCK_SZ_MR * BLOCK_SZ_NR];


     void pack_micro_A(int k,

                       const float *A,

                       int incRowA,

                       int incColA,

                       float *buffer);


     void pack_buffer_A(int mc,

                        int kc,

                        const float *A,

                        int incRowA,

                        int incColA,

                        float *buffer);


     void pack_micro_B(int k,

                       const float *B,

                       int incRowB,

                       int incColB,

                       float *buffer);


     void pack_buffer_B(int kc,

                        int nc,

                        const float *B,

                        int incRowB,

                        int incColB,

                        float *buffer);


     void sgemm_micro_kernel(int kc,

                             float alpha,

                             const float *A,

                             const float *B,

                             float beta,

                             float *C,

                             int incRowC,

                             int incColC);


     void sgeaxpy(int m,

                  int n,

                  float alpha,

                  const float *X,

                  int incRowX,

                  int incColX,

                  float *Y,

                  int incRowY,

                  int incColY);


     void sgescal(int m, int n, float alpha, float *X, int incRowX, int incColX);


     void sgemm_macro_kernel(int mc,

                             int nc,

                             int kc,

                             float alpha,

                             float beta,

                             float *C,

                             int incRowC,

                             int incColC);


     void sgemm_nn(int m,

                   int n,

                   int k,

                   float alpha,

                   const float *A,

                   int incRowA,

                   int incColA,

                   const float *B,

                   int incRowB,

                   int incColB,

                   float beta,

                   float *C,

                   int incRowC,

                   int incColC);

 };


 } // namespace linalg

 } // namespace gpmp


 #endif

BLOCK_SZ_M
#define BLOCK_SZ_M
Definition: _sgemm.hpp:37

BLOCK_SZ_MR
#define BLOCK_SZ_MR
Definition: _sgemm.hpp:40

BLOCK_SZ_N
#define BLOCK_SZ_N
Definition: _sgemm.hpp:39

BLOCK_SZ_NR
#define BLOCK_SZ_NR
Definition: _sgemm.hpp:41

BLOCK_SZ_K
#define BLOCK_SZ_K
Definition: _sgemm.hpp:38

gpmp::linalg::SGEMM
Class for performing matrix multiplication on float type arrays.
Definition: _sgemm.hpp:50

gpmp::linalg::SGEMM::SGEMM_BUFF_B
static float SGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N]
Definition: _sgemm.hpp:55

gpmp::linalg::SGEMM::pack_micro_A
void pack_micro_A(int k, const float *A, int incRowA, int incColA, float *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
Definition: sgemm_arr.cpp:50

gpmp::linalg::SGEMM::sgemm_macro_kernel
void sgemm_macro_kernel(int mc, int nc, int kc, float alpha, float beta, float *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
Definition: sgemm_arr.cpp:266

gpmp::linalg::SGEMM::pack_buffer_B
void pack_buffer_B(int kc, int nc, const float *B, int incRowB, int incColB, float *buffer)
Packs panels from B with padding if needed.
Definition: sgemm_arr.cpp:115

gpmp::linalg::SGEMM::sgeaxpy
void sgeaxpy(int m, int n, float alpha, const float *X, int incRowX, int incColX, float *Y, int incRowY, int incColY)
Computes Y += alpha*X (float precision AX + Y)
Definition: sgemm_arr.cpp:208

gpmp::linalg::SGEMM::sgemm_micro_kernel
void sgemm_micro_kernel(int kc, float alpha, const float *A, const float *B, float beta, float *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
Definition: sgemm_arr.cpp:146

gpmp::linalg::SGEMM::SGEMM_BUFF_A
static float SGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K]
Definition: _sgemm.hpp:53

gpmp::linalg::SGEMM::pack_buffer_A
void pack_buffer_A(int mc, int kc, const float *A, int incRowA, int incColA, float *buffer)
Packs panels from A with padding if needed.
Definition: sgemm_arr.cpp:67

gpmp::linalg::SGEMM::SGEMM_BUFF_C
static float SGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR]
Definition: _sgemm.hpp:57

gpmp::linalg::SGEMM::sgemm_nn
void sgemm_nn(int m, int n, int k, float alpha, const float *A, int incRowA, int incColA, const float *B, int incRowB, int incColB, float beta, float *C, int incRowC, int incColC)
Main SGEMM entrypoint, computes C <- beta*C + alpha*A*B.
Definition: sgemm_arr.cpp:332

gpmp::linalg::SGEMM::sgescal
void sgescal(int m, int n, float alpha, float *X, int incRowX, int incColX)
Scales elements of X by alpha.
Definition: sgemm_arr.cpp:239

gpmp::linalg::SGEMM::pack_micro_B
void pack_micro_B(int k, const float *B, int incRowB, int incColB, float *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
Definition: sgemm_arr.cpp:98

gpmp
The source C++ openGPMP namespace.

python.linalg.C
list C
Definition: linalg.py:24

python.linalg.A
list A
Definition: linalg.py:22

python.linalg.B
list B
Definition: linalg.py:23