openGPMP
Open Source Mathematics Package
dgemm_kernel.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 
37 // micro kernel that multiplies panels from A and B
39  double alpha,
40  const double *A,
41  const double *B,
42  double beta,
43  double *C,
44  int incRowC,
45  int incColC) {
46  double AB[BLOCK_SZ_MR * BLOCK_SZ_NR];
47 
48  int i, j, l;
49 
50  // Compute AB = A*B
51  for (l = 0; l < BLOCK_SZ_MR * BLOCK_SZ_NR; ++l) {
52  AB[l] = 0;
53  }
54  for (l = 0; l < kc; ++l) {
55  for (j = 0; j < BLOCK_SZ_NR; ++j) {
56  for (i = 0; i < BLOCK_SZ_MR; ++i) {
57  AB[i + j * BLOCK_SZ_MR] += A[i] * B[j];
58  }
59  }
60  A += BLOCK_SZ_MR;
61  B += BLOCK_SZ_NR;
62  }
63 
64  // Update C <- beta*C
65  if (fabs(beta - 0.0) < std::numeric_limits<double>::epsilon()) {
66  for (j = 0; j < BLOCK_SZ_NR; ++j) {
67  for (i = 0; i < BLOCK_SZ_MR; ++i) {
68  C[i * incRowC + j * incColC] = 0.0;
69  }
70  }
71  } else if (fabs(beta - 1.0) > std::numeric_limits<double>::epsilon()) {
72  for (j = 0; j < BLOCK_SZ_NR; ++j) {
73  for (i = 0; i < BLOCK_SZ_MR; ++i) {
74  C[i * incRowC + j * incColC] *= beta;
75  }
76  }
77  }
78 
79  // Update C <- C + alpha*AB (note: the case alpha==0.0 was already treated
80  // in
81  // the above layer dgemm_nn)
82  if (fabs(alpha - 1.0) < std::numeric_limits<double>::epsilon()) {
83  for (j = 0; j < BLOCK_SZ_NR; ++j) {
84  for (i = 0; i < BLOCK_SZ_MR; ++i) {
85  C[i * incRowC + j * incColC] += AB[i + j * BLOCK_SZ_MR];
86  }
87  }
88  }
89 
90  else {
91  for (j = 0; j < BLOCK_SZ_NR; ++j) {
92  for (i = 0; i < BLOCK_SZ_MR; ++i) {
93  C[i * incRowC + j * incColC] += alpha * AB[i + j * BLOCK_SZ_MR];
94  }
95  }
96  }
97 }
#define BLOCK_SZ_MR
Definition: _dgemm.hpp:40
#define BLOCK_SZ_NR
Definition: _dgemm.hpp:41
void dgemm_micro_kernel(int kc, double alpha, const double *A, const double *B, double beta, double *C, int incRowC, int incColC)
Computes the micro kernel that multiplies panels from A and B.
list C
Definition: linalg.py:24
list A
Definition: linalg.py:22
list B
Definition: linalg.py:23