1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/fp16/matrix_fp16.h"
18
MatrixMultiplyFp16(const float16_t * matrix_a,const float16_t * matrix_b,float16_t * matrix_c,int m,int k,int n)19 void MatrixMultiplyFp16(const float16_t *matrix_a, const float16_t *matrix_b, float16_t *matrix_c, int m, int k,
20 int n) {
21 int count = 0;
22 for (int h = 0; h < m; h++) {
23 int h_offset = h * k;
24 for (int w = 0; w < n; w++) {
25 float16_t res = 0;
26 for (int i = 0; i < k; i++) {
27 res += *(matrix_a + h_offset + i) * *(matrix_b + w + i * n);
28 }
29 *(matrix_c + count) = res;
30 count++;
31 }
32 }
33 }
34
35 #ifndef ENABLE_ARM64
MatrixMultiplyWinogradFp16(const float16_t * matix_a,const float16_t * matrix_b,float16_t * matrix_c,int m,int k,int n,int in_channel)36 void MatrixMultiplyWinogradFp16(const float16_t *matix_a, const float16_t *matrix_b, float16_t *matrix_c, int m, int k,
37 int n, int in_channel) {
38 int cnt = 0;
39 for (int i = 0; i < m; ++i) {
40 for (int j = 0; j < n; ++j) {
41 for (int y = 0; y < in_channel; ++y) {
42 float tmp = 0;
43 for (int z = 0; z < k; ++z) {
44 tmp += matix_a[z * in_channel + y + i * in_channel * k] * matrix_b[j + z * n];
45 }
46 matrix_c[cnt++] = tmp;
47 }
48 }
49 }
50 }
51 #endif
52
MatrixMultiplyVecFp16(const float16x8_t * matrix_a,const float16x8_t * matrix_b,float16x8_t * matrix_c,const float16_t * bias,int m,int k,int n)53 void MatrixMultiplyVecFp16(const float16x8_t *matrix_a, const float16x8_t *matrix_b, float16x8_t *matrix_c,
54 const float16_t *bias, int m, int k, int n) {
55 if (bias == NULL) {
56 int count = 0;
57 for (int h = 0; h < m; h++) {
58 int h_offset = h * k;
59 for (int w = 0; w < n; w++) {
60 float16x8_t res = vmovq_n_f16(0);
61 for (int i = 0; i < k; i++) {
62 res = vaddq_f16(res, vmulq_f16(matrix_a[h_offset + i], matrix_b[w + i * n]));
63 }
64 matrix_c[count] = res;
65 count++;
66 }
67 }
68 } else {
69 int count = 0;
70 float16x8_t bias_ptr = vld1q_f16(bias);
71 for (int h = 0; h < m; h++) {
72 int h_offset = h * k;
73 for (int w = 0; w < n; w++) {
74 float16x8_t res = vmovq_n_f16(0);
75 for (int i = 0; i < k; i++) {
76 res = vaddq_f16(res, vmulq_f16(matrix_a[h_offset + i], matrix_b[w + i * n]));
77 }
78 matrix_c[count] = vaddq_f16(res, bias_ptr);
79 count++;
80 }
81 }
82 }
83 }
84