openGPMP/__igemm_8hpp_source.html

 /*************************************************************************

  *

  *  Project

  *                         _____ _____  __  __ _____

  *                        / ____|  __ \|  \/  |  __ \

  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |

  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/

  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |

  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|

  *      | |

  *      |_|

  *

  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.

  *

  * This software is licensed as described in the file LICENSE, which

  * you should have received as part of this distribution. The terms

  * among other details are referenced in the official documentation

  * seen here : https://akielaries.github.io/openGPMP/ along with

  * important files seen in this project.

  *

  * You may opt to use, copy, modify, merge, publish, distribute

  * and/or sell copies of the Software, and permit persons to whom

  * the Software is furnished to do so, under the terms of the

  * LICENSE file. As this is an Open Source effort, all implementations

  * must be of the same methodology.

  *

  *

  *

  * This software is distributed on an AS IS basis, WITHOUT

  * WARRANTY OF ANY KIND, either express or implied.

  *

  ************************************************************************/

 #ifndef _IGEMM_HPP

 #define _IGEMM_HPP


 #define BLOCK_SZ_M 384

 #define BLOCK_SZ_K 384

 #define BLOCK_SZ_N 4096

 #define BLOCK_SZ_MR 4

 #define BLOCK_SZ_NR 4

 namespace gpmp {

 namespace linalg {


 class IGEMM {

   public:

     static int IGEMM_BUFF_A[BLOCK_SZ_M * BLOCK_SZ_K];

     static int IGEMM_BUFF_B[BLOCK_SZ_K * BLOCK_SZ_N];

     static int IGEMM_BUFF_C[BLOCK_SZ_MR * BLOCK_SZ_NR];


     void

     pack_micro_A(int k, const int *A, int incRowA, int incColA, int *buffer);


     void pack_buffer_A(int mc,

                        int kc,

                        const int *A,

                        int incRowA,

                        int incColA,

                        int *buffer);


     void

     pack_micro_B(int k, const int *B, int incRowB, int incColB, int *buffer);


     void pack_buffer_B(int kc,

                        int nc,

                        const int *B,

                        int incRowB,

                        int incColB,

                        int *buffer);


     void igemm_micro_kernel(int kc,

                             int alpha,

                             const int *A,

                             const int *B,

                             int beta,

                             int *C,

                             int incRowC,

                             int incColC);


     void igeaxpy(int m,

                  int n,

                  int alpha,

                  const int *X,

                  int incRowX,

                  int incColX,

                  int *Y,

                  int incRowY,

                  int incColY);


     void igescal(int m, int n, int alpha, int *X, int incRowX, int incColX);


     void igemm_macro_kernel(int mc,

                             int nc,

                             int kc,

                             int alpha,

                             int beta,

                             int *C,

                             int incRowC,

                             int incColC);


     void igemm_nn(int m,

                   int n,

                   int k,

                   int alpha,

                   const int *A,

                   int incRowA,

                   int incColA,

                   const int *B,

                   int incRowB,

                   int incColB,

                   int beta,

                   int *C,

                   int incRowC,

                   int incColC);

 };


 } // namespace linalg

 } // namespace gpmp


 #endif

BLOCK_SZ_M
#define BLOCK_SZ_M
Definition: _igemm.hpp:37

BLOCK_SZ_MR
#define BLOCK_SZ_MR
Definition: _igemm.hpp:40

BLOCK_SZ_N
#define BLOCK_SZ_N
Definition: _igemm.hpp:39

BLOCK_SZ_NR
#define BLOCK_SZ_NR
Definition: _igemm.hpp:41

BLOCK_SZ_K
#define BLOCK_SZ_K
Definition: _igemm.hpp:38

gpmp::linalg::IGEMM
Class for performing matrix multiplication on int type arrays.
Definition: _igemm.hpp:50

gpmp::linalg::IGEMM::igemm_macro_kernel
void igemm_macro_kernel(int mc, int nc, int kc, int alpha, int beta, int *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
Definition: igemm_arr.cpp:265

gpmp::linalg::IGEMM::igemm_nn
void igemm_nn(int m, int n, int k, int alpha, const int *A, int incRowA, int incColA, const int *B, int incRowB, int incColB, int beta, int *C, int incRowC, int incColC)
Main IGEMM entrypoint, computes C <- beta*C + alpha*A*B.
Definition: igemm_arr.cpp:331

gpmp::linalg::IGEMM::pack_buffer_B
void pack_buffer_B(int kc, int nc, const int *B, int incRowB, int incColB, int *buffer)
Packs panels from B with padding if needed.
Definition: igemm_arr.cpp:115

gpmp::linalg::IGEMM::igeaxpy
void igeaxpy(int m, int n, int alpha, const int *X, int incRowX, int incColX, int *Y, int incRowY, int incColY)
Computes Y += alpha*X (int precision AX + Y)
Definition: igemm_arr.cpp:208

gpmp::linalg::IGEMM::igemm_micro_kernel
void igemm_micro_kernel(int kc, int alpha, const int *A, const int *B, int beta, int *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
Definition: igemm_arr.cpp:146

gpmp::linalg::IGEMM::igescal
void igescal(int m, int n, int alpha, int *X, int incRowX, int incColX)
Scales elements of X by alpha.
Definition: igemm_arr.cpp:238

gpmp::linalg::IGEMM::IGEMM_BUFF_B
static int IGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N]
Definition: _igemm.hpp:55

gpmp::linalg::IGEMM::pack_micro_A
void pack_micro_A(int k, const int *A, int incRowA, int incColA, int *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
Definition: igemm_arr.cpp:50

gpmp::linalg::IGEMM::IGEMM_BUFF_A
static int IGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K]
Definition: _igemm.hpp:53

gpmp::linalg::IGEMM::pack_micro_B
void pack_micro_B(int k, const int *B, int incRowB, int incColB, int *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
Definition: igemm_arr.cpp:98

gpmp::linalg::IGEMM::IGEMM_BUFF_C
static int IGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR]
Definition: _igemm.hpp:57

gpmp::linalg::IGEMM::pack_buffer_A
void pack_buffer_A(int mc, int kc, const int *A, int incRowA, int incColA, int *buffer)
Packs panels from A with padding if needed.
Definition: igemm_arr.cpp:67

gpmp
The source C++ openGPMP namespace.

python.linalg.C
list C
Definition: linalg.py:24

python.linalg.A
list A
Definition: linalg.py:22

python.linalg.B
list B
Definition: linalg.py:23