37 #define BLOCK_SZ_M 2048
38 #define BLOCK_SZ_K 2048
39 #define BLOCK_SZ_N 4096
180 const double *nextB);
215 dgescal(
int m,
int n,
double alpha,
double *X,
int incRowX,
int incColX);
Class for performing matrix multiplication on double type arrays.
void dgemm_micro_kernel(int kc, double alpha, const double *A, const double *B, double beta, double *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
void pack_buffer_B(int kc, int nc, const double *B, int incRowB, int incColB, double *buffer)
Packs panels from B with padding if needed.
void pack_micro_A(int k, const double *A, int incRowA, int incColA, double *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
static double DGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N] __attribute__((aligned(32)))
static double DGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR] __attribute__((aligned(32)))
void dgeaxpy(int m, int n, double alpha, const double *X, int incRowX, int incColX, double *Y, int incRowY, int incColY)
Computes Y += alpha*X (double precision AX + Y)
void pack_micro_B(int k, const double *B, int incRowB, int incColB, double *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
void dgescal(int m, int n, double alpha, double *X, int incRowX, int incColX)
Scales elements of X by alpha.
void dgemm_macro_kernel(int mc, int nc, int kc, double alpha, double beta, double *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
void pack_buffer_A(int mc, int kc, const double *A, int incRowA, int incColA, double *buffer)
Packs panels from A with padding if needed.
static double DGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K] __attribute__((aligned(32)))
void dgemm_nn(int m, int n, int k, double alpha, const double *A, int incRowA, int incColA, const double *B, int incRowB, int incColB, double beta, double *C, int incRowC, int incColC)
Main DGEMM entrypoint, computes C <- beta*C + alpha*A*B.
The source C++ openGPMP namespace.