openGPMP
Open Source Mathematics Package
|
Class for performing matrix multiplication on double type arrays. More...
#include <_dgemm.hpp>
Public Member Functions | |
void | pack_micro_A (int k, const double *A, int incRowA, int incColA, double *buffer) |
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding. More... | |
void | pack_buffer_A (int mc, int kc, const double *A, int incRowA, int incColA, double *buffer) |
Packs panels from A with padding if needed. More... | |
void | pack_micro_B (int k, const double *B, int incRowB, int incColB, double *buffer) |
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding. More... | |
void | pack_buffer_B (int kc, int nc, const double *B, int incRowB, int incColB, double *buffer) |
Packs panels from B with padding if needed. More... | |
void | dgemm_micro_kernel (int kc, double alpha, const double *A, const double *B, double beta, double *C, int incRowC, int incColC) |
Computes the micro kernel that multiplies panels from A and B. More... | |
void | dgemm_micro_kernel (long kc, double alpha, const double *A, const double *B, double beta, double *C, long incRowC, long incColC, const double *nextA, const double *nextB) |
Perform a micro-kernel operation for double-precision matrix-matrix multiplication (DGEMM) More... | |
void | dgeaxpy (int m, int n, double alpha, const double *X, int incRowX, int incColX, double *Y, int incRowY, int incColY) |
Computes Y += alpha*X (double precision AX + Y) More... | |
void | dgescal (int m, int n, double alpha, double *X, int incRowX, int incColX) |
Scales elements of X by alpha. More... | |
void | dgemm_macro_kernel (int mc, int nc, int kc, double alpha, double beta, double *C, int incRowC, int incColC) |
Macro kernel for the multiplication of blocks of A and B. More... | |
void | dgemm_nn (int m, int n, int k, double alpha, const double *A, int incRowA, int incColA, const double *B, int incRowB, int incColB, double beta, double *C, int incRowC, int incColC) |
Main DGEMM entrypoint, computes C <- beta*C + alpha*A*B. More... | |
Static Public Member Functions | |
static double DGEMM_BUFF_A[BLOCK_SZ_M *BLOCK_SZ_K] | __attribute__ ((aligned(32))) |
static double DGEMM_BUFF_B[BLOCK_SZ_K *BLOCK_SZ_N] | __attribute__ ((aligned(32))) |
static double DGEMM_BUFF_C[BLOCK_SZ_MR *BLOCK_SZ_NR] | __attribute__ ((aligned(32))) |
Class for performing matrix multiplication on double type arrays.
Definition at line 50 of file _dgemm.hpp.
|
static |
< Buffer for storing packed micro panels of A
Buffer for storing packed micro panels of B
|
static |
Buffer for storing intermediate results
|
static |
void gpmp::linalg::DGEMM::dgeaxpy | ( | int | m, |
int | n, | ||
double | alpha, | ||
const double * | X, | ||
int | incRowX, | ||
int | incColX, | ||
double * | Y, | ||
int | incRowY, | ||
int | incColY | ||
) |
Computes Y += alpha*X (double precision AX + Y)
m | Number of rows |
n | Number of columns |
alpha | Scalar alpha |
X | Pointer to matrix X |
incRowX | Increment between consecutive rows of X |
incColX | Increment between consecutive columns of X |
Y | Pointer to matrix Y |
incRowY | Increment between consecutive rows of Y |
incColY | Increment between consecutive columns of Y |
Definition at line 217 of file dgemm_arr.cpp.
void gpmp::linalg::DGEMM::dgemm_macro_kernel | ( | int | mc, |
int | nc, | ||
int | kc, | ||
double | alpha, | ||
double | beta, | ||
double * | C, | ||
int | incRowC, | ||
int | incColC | ||
) |
Macro kernel for the multiplication of blocks of A and B.
mc | Number of rows in the block of C |
nc | Number of columns in the block of C |
kc | Number of columns in the blocks of A and rows of B |
alpha | Scalar alpha |
beta | Scalar beta |
C | Pointer to the output matrix C |
incRowC | Increment between consecutive rows of C |
incColC | Increment between consecutive columns of C |
Definition at line 275 of file dgemm_arr.cpp.
References BLOCK_SZ_MR, BLOCK_SZ_NR, and python.linalg::C.
void gpmp::linalg::DGEMM::dgemm_micro_kernel | ( | int | kc, |
double | alpha, | ||
const double * | A, | ||
const double * | B, | ||
double | beta, | ||
double * | C, | ||
int | incRowC, | ||
int | incColC | ||
) |
Computes the micro kernel that multiplies panels from A and B.
kc | Number of columns |
alpha | Scalar alpha |
A | Pointer to the packed panel A |
B | Pointer to the packed panel B |
beta | Scalar beta |
C | Pointer to the output matrix C |
incRowC | Increment between consecutive rows of C |
incColC | Increment between consecutive columns of C |
Double precision GEneral Matrix-Matrix product kernel without accl
Definition at line 38 of file dgemm_kernel.cpp.
References python.linalg::A, python.linalg::B, BLOCK_SZ_MR, BLOCK_SZ_NR, and python.linalg::C.
void gpmp::linalg::DGEMM::dgemm_micro_kernel | ( | long | kc, |
double | alpha, | ||
const double * | A, | ||
const double * | B, | ||
double | beta, | ||
double * | C, | ||
long | incRowC, | ||
long | incColC, | ||
const double * | nextA, | ||
const double * | nextB | ||
) |
Perform a micro-kernel operation for double-precision matrix-matrix multiplication (DGEMM)
This function implements a micro-kernel operation for DGEMM, which is used as a building block in larger DGEMM routines. The micro-kernel performs a basic matrix multiplication operation with optimizations tailored for the SSE (Streaming SIMD Extensions) x86 architecture
kc | The size of the inner dimension of the matrices A and B |
alpha | Scaling factor for the matrix multiplication |
A | Pointer to the first input matrix A in row-major order |
B | Pointer to the second input matrix B in column-major order |
beta | Scaling factor for the matrix C |
C | Pointer to the output matrix C in row-major order |
incRowC | Increment for moving between rows of the matrix C |
incColC | Increment for moving between columns of the matrix C |
nextA | Pointer to the next block of matrix A (unused in this micro-kernel) |
nextB | Pointer to the next block of matrix B (unused in this micro-kernel) |
Double precision GEneral Matrix-Matrix product
Definition at line 88 of file dgemm_arr.cpp.
References python.linalg::A, python.linalg::B, and python.linalg::C.
void gpmp::linalg::DGEMM::dgemm_nn | ( | int | m, |
int | n, | ||
int | k, | ||
double | alpha, | ||
const double * | A, | ||
int | incRowA, | ||
int | incColA, | ||
const double * | B, | ||
int | incRowB, | ||
int | incColB, | ||
double | beta, | ||
double * | C, | ||
int | incRowC, | ||
int | incColC | ||
) |
Main DGEMM entrypoint, computes C <- beta*C + alpha*A*B.
m | Number of rows of A and rows of C |
n | Number of columns of B and columns of C |
k | Number of columns of A and rows of B |
alpha | Scalar alpha |
A | Pointer to matrix A |
incRowA | Increment between consecutive rows of A |
incColA | Increment between consecutive columns of A |
B | Pointer to matrix B |
incRowB | Increment between consecutive rows of B |
incColB | Increment between consecutive columns of B |
beta | Scalar beta |
C | Pointer to matrix C |
incRowC | Increment between consecutive rows of C |
incColC | Increment between consecutive columns of C |
Definition at line 381 of file dgemm_arr.cpp.
References python.linalg::A, python.linalg::B, BLOCK_SZ_K, BLOCK_SZ_M, BLOCK_SZ_N, and python.linalg::C.
void gpmp::linalg::DGEMM::dgescal | ( | int | m, |
int | n, | ||
double | alpha, | ||
double * | X, | ||
int | incRowX, | ||
int | incColX | ||
) |
Scales elements of X by alpha.
m | Number of rows |
n | Number of columns |
alpha | Scalar alpha |
X | Pointer to matrix X |
incRowX | Increment between consecutive rows of X |
incColX | Increment between consecutive columns of X |
Definition at line 248 of file dgemm_arr.cpp.
void gpmp::linalg::DGEMM::pack_buffer_A | ( | int | mc, |
int | kc, | ||
const double * | A, | ||
int | incRowA, | ||
int | incColA, | ||
double * | buffer | ||
) |
Packs panels from A with padding if needed.
mc | Number of rows to pack |
kc | Number of columns to pack |
A | Pointer to the source matrix A |
incRowA | Increment between consecutive rows of A |
incColA | Increment between consecutive columns of A |
buffer | Pointer to the buffer to store the packed panels |
Definition at line 138 of file dgemm_arr.cpp.
References python.linalg::A, and BLOCK_SZ_MR.
void gpmp::linalg::DGEMM::pack_buffer_B | ( | int | kc, |
int | nc, | ||
const double * | B, | ||
int | incRowB, | ||
int | incColB, | ||
double * | buffer | ||
) |
Packs panels from B with padding if needed.
kc | Number of rows to pack |
nc | Number of columns to pack |
B | Pointer to the source matrix B |
incRowB | Increment between consecutive rows of B |
incColB | Increment between consecutive columns of B |
buffer | Pointer to the buffer to store the packed panels |
Definition at line 186 of file dgemm_arr.cpp.
References python.linalg::B, and BLOCK_SZ_NR.
void gpmp::linalg::DGEMM::pack_micro_A | ( | int | k, |
const double * | A, | ||
int | incRowA, | ||
int | incColA, | ||
double * | buffer | ||
) |
Packs micro panels of size BLOCK_SZ_MR rows by k columns from A without padding.
k | Number of columns to pack |
A | Pointer to the source matrix A |
incRowA | Increment between consecutive rows of A |
incColA | Increment between consecutive columns of A |
buffer | Pointer to the buffer to store the packed micro panels |
Definition at line 121 of file dgemm_arr.cpp.
References python.linalg::A, and BLOCK_SZ_MR.
void gpmp::linalg::DGEMM::pack_micro_B | ( | int | k, |
const double * | B, | ||
int | incRowB, | ||
int | incColB, | ||
double * | buffer | ||
) |
Packs micro panels of size BLOCK_SZ_NR columns by k rows from B without padding.
k | Number of rows to pack |
B | Pointer to the source matrix B |
incRowB | Increment between consecutive rows of B |
incColB | Increment between consecutive columns of B |
buffer | Pointer to the buffer to store the packed micro panels |
Definition at line 169 of file dgemm_arr.cpp.
References python.linalg::B, and BLOCK_SZ_NR.