37 #define BLOCK_SZ_M 384
38 #define BLOCK_SZ_K 384
39 #define BLOCK_SZ_N 4096
70 pack_micro_A(
int k,
const int *
A,
int incRowA,
int incColA,
int *buffer);
100 pack_micro_B(
int k,
const int *
B,
int incRowB,
int incColB,
int *buffer);
173 void igescal(
int m,
int n,
int alpha,
int *X,
int incRowX,
int incColX);
Class for performing matrix multiplication on int type arrays.
void igemm_macro_kernel(int mc, int nc, int kc, int alpha, int beta, int *C, int incRowC, int incColC)
Macro kernel for the multiplication of blocks of A and B.
void igemm_nn(int m, int n, int k, int alpha, const int *A, int incRowA, int incColA, const int *B, int incRowB, int incColB, int beta, int *C, int incRowC, int incColC)
Main IGEMM entrypoint, computes C <- beta*C + alpha*A*B.
void pack_buffer_B(int kc, int nc, const int *B, int incRowB, int incColB, int *buffer)
Packs panels from B with padding if needed.
void igeaxpy(int m, int n, int alpha, const int *X, int incRowX, int incColX, int *Y, int incRowY, int incColY)
Computes Y += alpha*X (int precision AX + Y)
void igemm_micro_kernel(int kc, int alpha, const int *A, const int *B, int beta, int *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
void igescal(int m, int n, int alpha, int *X, int incRowX, int incColX)
Scales elements of X by alpha.
static int IGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N]
void pack_micro_A(int k, const int *A, int incRowA, int incColA, int *buffer)
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
static int IGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K]
void pack_micro_B(int k, const int *B, int incRowB, int incColB, int *buffer)
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
static int IGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR]
void pack_buffer_A(int mc, int kc, const int *A, int incRowA, int incColA, int *buffer)
Packs panels from A with padding if needed.
The source C++ openGPMP namespace.