openGPMP/__dgemm_8hpp_source.html

 /*************************************************************************

  *

  *  Project

  *                         _____ _____  __  __ _____

  *                        / ____|  __ \|  \/  |  __ \

  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |

  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/

  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |

  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|

  *      | |

  *      |_|

  *

  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.

  *

  * This software is licensed as described in the file LICENSE, which

  * you should have received as part of this distribution. The terms

  * among other details are referenced in the official documentation

  * seen here : https://akielaries.github.io/openGPMP/ along with

  * important files seen in this project.

  *

  * You may opt to use, copy, modify, merge, publish, distribute

  * and/or sell copies of the Software, and permit persons to whom

  * the Software is furnished to do so, under the terms of the

  * LICENSE file. As this is an Open Source effort, all implementations

  * must be of the same methodology.

  *

  *

  *

  * This software is distributed on an AS IS basis, WITHOUT

  * WARRANTY OF ANY KIND, either express or implied.

  *

  ************************************************************************/

 #ifndef _DGEMM_HPP

 #define _DGEMM_HPP


 #define BLOCK_SZ_M 2048

 #define BLOCK_SZ_K 2048

 #define BLOCK_SZ_N 4096

 #define BLOCK_SZ_MR 4

 #define BLOCK_SZ_NR 4

 namespace gpmp {

 namespace linalg {


 class DGEMM {

   public:

     static double DGEMM_BUFF_A[BLOCK_SZ_M * BLOCK_SZ_K]

         __attribute__((aligned(32)));

     static double DGEMM_BUFF_B[BLOCK_SZ_K * BLOCK_SZ_N]

         __attribute__((aligned(32)));

     static double DGEMM_BUFF_C[BLOCK_SZ_MR * BLOCK_SZ_NR]

         __attribute__((aligned(32)));


     void pack_micro_A(int k,

                       const double *A,

                       int incRowA,

                       int incColA,

                       double *buffer);


     void pack_buffer_A(int mc,

                        int kc,

                        const double *A,

                        int incRowA,

                        int incColA,

                        double *buffer);


     void pack_micro_B(int k,

                       const double *B,

                       int incRowB,

                       int incColB,

                       double *buffer);


     void pack_buffer_B(int kc,

                        int nc,

                        const double *B,

                        int incRowB,

                        int incColB,

                        double *buffer);


     void dgemm_micro_kernel(int kc,

                             double alpha,

                             const double *A,

                             const double *B,

                             double beta,

                             double *C,

                             int incRowC,

                             int incColC);


     void dgemm_micro_kernel(long kc,

                             double alpha,

                             const double *A,

                             const double *B,

                             double beta,

                             double *C,

                             long incRowC,

                             long incColC,

                             const double *nextA,

                             const double *nextB);

     void dgeaxpy(int m,

                  int n,

                  double alpha,

                  const double *X,

                  int incRowX,

                  int incColX,

                  double *Y,

                  int incRowY,

                  int incColY);


     void

     dgescal(int m, int n, double alpha, double *X, int incRowX, int incColX);


     void dgemm_macro_kernel(int mc,

                             int nc,

                             int kc,

                             double alpha,

                             double beta,

                             double *C,

                             int incRowC,

                             int incColC);


     void dgemm_nn(int m,

                   int n,

                   int k,

                   double alpha,

                   const double *A,

                   int incRowA,

                   int incColA,

                   const double *B,

                   int incRowB,

                   int incColB,

                   double beta,

                   double *C,

                   int incRowC,

                   int incColC);

 };


 } // namespace linalg

 } // namespace gpmp


 #endif

BLOCK_SZ_M
#define BLOCK_SZ_M
Definition: _dgemm.hpp:37

BLOCK_SZ_MR
#define BLOCK_SZ_MR
Definition: _dgemm.hpp:40

BLOCK_SZ_N
#define BLOCK_SZ_N
Definition: _dgemm.hpp:39

BLOCK_SZ_NR
#define BLOCK_SZ_NR
Definition: _dgemm.hpp:41

BLOCK_SZ_K
#define BLOCK_SZ_K
Definition: _dgemm.hpp:38

gpmp::linalg::DGEMM
Class for performing matrix multiplication on double type arrays.
Definition: _dgemm.hpp:50

gpmp::linalg::DGEMM::dgemm_micro_kernel
void dgemm_micro_kernel(int kc, double alpha, const double *A, const double *B, double beta, double *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
Definition: dgemm_kernel.cpp:38

gpmp::linalg::DGEMM::pack_buffer_B
void pack_buffer_B(int kc, int nc, const double *B, int incRowB, int incColB, double *buffer)
Packs panels from B with padding if needed.
Definition: dgemm_arr.cpp:186

gpmp::linalg::DGEMM::pack_micro_A
void pack_micro_A(int k, const double *A, int incRowA, int incColA, double *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
Definition: dgemm_arr.cpp:121

gpmp::linalg::DGEMM::__attribute__
static double DGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N] __attribute__((aligned(32)))

gpmp::linalg::DGEMM::__attribute__
static double DGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR] __attribute__((aligned(32)))

gpmp::linalg::DGEMM::dgeaxpy
void dgeaxpy(int m, int n, double alpha, const double *X, int incRowX, int incColX, double *Y, int incRowY, int incColY)
Computes Y += alpha*X (double precision AX + Y)
Definition: dgemm_arr.cpp:217

gpmp::linalg::DGEMM::pack_micro_B
void pack_micro_B(int k, const double *B, int incRowB, int incColB, double *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
Definition: dgemm_arr.cpp:169

gpmp::linalg::DGEMM::dgescal
void dgescal(int m, int n, double alpha, double *X, int incRowX, int incColX)
Scales elements of X by alpha.
Definition: dgemm_arr.cpp:248

gpmp::linalg::DGEMM::dgemm_macro_kernel
void dgemm_macro_kernel(int mc, int nc, int kc, double alpha, double beta, double *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
Definition: dgemm_arr.cpp:275

gpmp::linalg::DGEMM::pack_buffer_A
void pack_buffer_A(int mc, int kc, const double *A, int incRowA, int incColA, double *buffer)
Packs panels from A with padding if needed.
Definition: dgemm_arr.cpp:138

gpmp::linalg::DGEMM::__attribute__
static double DGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K] __attribute__((aligned(32)))

gpmp::linalg::DGEMM::dgemm_nn
void dgemm_nn(int m, int n, int k, double alpha, const double *A, int incRowA, int incColA, const double *B, int incRowB, int incColB, double beta, double *C, int incRowC, int incColC)
Main DGEMM entrypoint, computes C <- beta*C + alpha*A*B.
Definition: dgemm_arr.cpp:381

gpmp
The source C++ openGPMP namespace.

python.linalg.C
list C
Definition: linalg.py:24

python.linalg.A
list A
Definition: linalg.py:22

python.linalg.B
list B
Definition: linalg.py:23