• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/fp16/matrix_fp16.h"
18 
MatrixMultiplyFp16(const float16_t * matrix_a,const float16_t * matrix_b,float16_t * matrix_c,int m,int k,int n)19 void MatrixMultiplyFp16(const float16_t *matrix_a, const float16_t *matrix_b, float16_t *matrix_c, int m, int k,
20                         int n) {
21   int count = 0;
22   for (int h = 0; h < m; h++) {
23     int h_offset = h * k;
24     for (int w = 0; w < n; w++) {
25       float16_t res = 0;
26       for (int i = 0; i < k; i++) {
27         res += *(matrix_a + h_offset + i) * *(matrix_b + w + i * n);
28       }
29       *(matrix_c + count) = res;
30       count++;
31     }
32   }
33 }
34 
35 #ifndef ENABLE_ARM64
MatrixMultiplyWinogradFp16(const float16_t * matix_a,const float16_t * matrix_b,float16_t * matrix_c,int m,int k,int n,int in_channel)36 void MatrixMultiplyWinogradFp16(const float16_t *matix_a, const float16_t *matrix_b, float16_t *matrix_c, int m, int k,
37                                 int n, int in_channel) {
38   int cnt = 0;
39   for (int i = 0; i < m; ++i) {
40     for (int j = 0; j < n; ++j) {
41       for (int y = 0; y < in_channel; ++y) {
42         float tmp = 0;
43         for (int z = 0; z < k; ++z) {
44           tmp += matix_a[z * in_channel + y + i * in_channel * k] * matrix_b[j + z * n];
45         }
46         matrix_c[cnt++] = tmp;
47       }
48     }
49   }
50 }
51 #endif
52 
MatrixMultiplyVecFp16(const float16x8_t * matrix_a,const float16x8_t * matrix_b,float16x8_t * matrix_c,const float16_t * bias,int m,int k,int n)53 void MatrixMultiplyVecFp16(const float16x8_t *matrix_a, const float16x8_t *matrix_b, float16x8_t *matrix_c,
54                            const float16_t *bias, int m, int k, int n) {
55   if (bias == NULL) {
56     int count = 0;
57     for (int h = 0; h < m; h++) {
58       int h_offset = h * k;
59       for (int w = 0; w < n; w++) {
60         float16x8_t res = vmovq_n_f16(0);
61         for (int i = 0; i < k; i++) {
62           res = vaddq_f16(res, vmulq_f16(matrix_a[h_offset + i], matrix_b[w + i * n]));
63         }
64         matrix_c[count] = res;
65         count++;
66       }
67     }
68   } else {
69     int count = 0;
70     float16x8_t bias_ptr = vld1q_f16(bias);
71     for (int h = 0; h < m; h++) {
72       int h_offset = h * k;
73       for (int w = 0; w < n; w++) {
74         float16x8_t res = vmovq_n_f16(0);
75         for (int i = 0; i < k; i++) {
76           res = vaddq_f16(res, vmulq_f16(matrix_a[h_offset + i], matrix_b[w + i * n]));
77         }
78         matrix_c[count] = vaddq_f16(res, bias_ptr);
79         count++;
80       }
81     }
82   }
83 }
84