• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_NNACL_FP32_MATMUL_H_
18 #define MINDSPORE_NNACL_FP32_MATMUL_H_
19 
20 #include <float.h>
21 #include <string.h>
22 #include "nnacl/errorcode.h"
23 #include "nnacl/matmul_parameter.h"
24 #include "nnacl/op_base.h"
25 
26 #define ADD_BIAS(value, bias, c) \
27   if (bias != NULL) value = value + bias[c];
28 
29 #define DO_RELU(value, act_type) \
30   if (act_type == ActType_Relu) value = MSMAX(0.0f, value);
31 
32 #define DO_RELU6(value, act_type)                            \
33   if (act_type == ActType_Relu6) value = MSMIN(6.0f, value); \
34   if (act_type == ActType_Relu6) value = MSMAX(0.0f, value);
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 void MatMulOpt(const float *a, const float *b, float *c, const float *bias, ActType act_type, int deep, int row,
40                int col, size_t stride, int out_type);
41 void MatVecMulFp32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int col);
42 
43 void RowMajor2ColMajor(const float *src_ptr, float *dst_ptr, int row, int col);
44 void RowMajor2Row4Major(const float *src_ptr, float *dst_ptr, int row, int col);
45 void RowMajor2Row6Major(const float *src_ptr, float *dst_ptr, int row, int col);
46 void RowMajor2Row8Major(const float *src_ptr, float *dst_ptr, int row, int col);
47 void RowMajor2Row12Major(const float *src_ptr, float *dst_ptr, int row, int col);
48 void RowMajor2Row16Major(const float *src_ptr, float *dst_ptr, int row, int col);
49 void RowMajor2Row32Major(const float *src_ptr, float *dst_ptr, int row, int col);
50 void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col);
51 void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col);
52 void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col);
53 void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col);
54 void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col);
55 void RowMajor2Col32Major(const float *src_ptr, float *dst_ptr, int row, int col);
56 
57 #ifdef ENABLE_ARM64
58 void MatmulFloatNeon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row,
59                        int col, size_t stride, size_t writeNhwc, size_t WriteWino);
60 void MatmulFloatNeon64Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row,
61                           int col, size_t stride, size_t write_mode);
62 void MatmulFloatNeon64OptRow8(const float *a, const float *b, float *c, const float *bias, int act_type, int depth,
63                               int row, int col, size_t stride, size_t write_mode);
64 void MatmulFloatNeon64OptRow4(const float *a, const float *b, float *c, const float *bias, int act_type, int depth,
65                               int row, int col, size_t stride, size_t write_mode);
66 void MatmulFloatNeon64OptRow12(const float *a, const float *b, float *c, const float *bias, int act_type, int depth,
67                                int row, int col, size_t stride, size_t write_mode);
68 void MatVecMulFp32Neon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int col,
69                          int align_col);
70 
71 #elif defined(ENABLE_ARM32)
72 void MatmulFloatNeon32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row,
73                        int col, int stride, size_t writeNhwc, size_t WriteWino);
74 void MatmulFloatNeon32Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row,
75                           int col, int stride, int write_mode);
76 void MatmulFloatNeon32Opt12x4(const float *a, const float *b, float *c, const float *bias, int act_type, int depth,
77                               int row, int col, int stride, int write_mode);
78 
79 #elif defined(ENABLE_AVX)
80 typedef void (*DeconvAvxKernel)(const float *src, const float *weight, float *dst, int col, int row, int depth,
81                                 int stride);
82 void DeconvMatmulAvx(const float *a, const float *b, float *c, int depth, int row, int col, int kernel_plane);
83 void MatmulFloatAvxOpt(const float *a, const float *b, float *c, const float *bias, size_t act_type, size_t depth,
84                        size_t row, size_t col, size_t stride, size_t write_mode);
85 typedef void (*MatVecMulKernel)(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag,
86                                 size_t row_block, size_t col_block, size_t col_algin, size_t deep);
87 void MatVecMulAvxFp32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int cur_col,
88                       int col_align);
89 void MatVecMul1x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag,
90                          size_t row_block, size_t col_block, size_t col_algin, size_t deep);
91 void MatVecMul1x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag,
92                          size_t row_block, size_t col_block, size_t col_algin, size_t deep);
93 void MatVecMul1x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag,
94                          size_t row_block, size_t col_block, size_t col_algin, size_t deep);
95 void MatVecMul1x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag,
96                         size_t row_block, size_t col_block, size_t col_algin, size_t deep);
97 #ifdef ENABLE_DEBUG
98 void DeconvColXRowAvxKernel(const float *src, const float *weight, float *dst, int col, int row, int depth, int stride);
99 
100 void MatVecMulRowxColKernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag,
101                             size_t row_block, size_t col_block, size_t col_algin, size_t deep);
102 #endif
103 
104 #elif defined(ENABLE_SSE)
105 void DeconvMatmulFloatSse(const float *a, const float *b, float *c, int depth, int row, int col);
106 void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row,
107                          int col, int stride, int write_mode);
108 #endif
109 
110 void MatMul12x8(const float *a, const float *b, float *dst, const float *bias, ActType act_type, int deep, int row,
111                 int col, int stride, int out_type);
112 
113 #ifdef __cplusplus
114 }
115 #endif
116 #endif  // MINDSPORE_NNACL_FP32_MATMUL_H_
117