1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_NNACL_FP32_MATMUL_H_ 18 #define MINDSPORE_NNACL_FP32_MATMUL_H_ 19 20 #include <float.h> 21 #include <string.h> 22 #include "nnacl/errorcode.h" 23 #include "nnacl/matmul_parameter.h" 24 #include "nnacl/op_base.h" 25 26 #define ADD_BIAS(value, bias, c) \ 27 if (bias != NULL) value = value + bias[c]; 28 29 #define DO_RELU(value, act_type) \ 30 if (act_type == ActType_Relu) value = MSMAX(0.0f, value); 31 32 #define DO_RELU6(value, act_type) \ 33 if (act_type == ActType_Relu6) value = MSMIN(6.0f, value); \ 34 if (act_type == ActType_Relu6) value = MSMAX(0.0f, value); 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 void MatMulOpt(const float *a, const float *b, float *c, const float *bias, ActType act_type, int deep, int row, 40 int col, size_t stride, int out_type); 41 void MatVecMulFp32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int col); 42 43 void RowMajor2ColMajor(const float *src_ptr, float *dst_ptr, int row, int col); 44 void RowMajor2Row4Major(const float *src_ptr, float *dst_ptr, int row, int col); 45 void RowMajor2Row6Major(const float *src_ptr, float *dst_ptr, int row, int col); 46 void RowMajor2Row8Major(const float *src_ptr, float *dst_ptr, int row, int col); 47 void RowMajor2Row12Major(const float *src_ptr, float *dst_ptr, int row, int col); 48 void RowMajor2Row16Major(const float *src_ptr, float *dst_ptr, int row, int col); 49 void RowMajor2Row32Major(const float *src_ptr, float *dst_ptr, int row, int col); 50 void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col); 51 void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col); 52 void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col); 53 void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col); 54 void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col); 55 void RowMajor2Col32Major(const float *src_ptr, float *dst_ptr, int row, int col); 56 57 #ifdef ENABLE_ARM64 58 void MatmulFloatNeon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row, 59 int col, size_t stride, size_t writeNhwc, size_t WriteWino); 60 void MatmulFloatNeon64Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row, 61 int col, size_t stride, size_t write_mode); 62 void MatmulFloatNeon64OptRow8(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, 63 int row, int col, size_t stride, size_t write_mode); 64 void MatmulFloatNeon64OptRow4(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, 65 int row, int col, size_t stride, size_t write_mode); 66 void MatmulFloatNeon64OptRow12(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, 67 int row, int col, size_t stride, size_t write_mode); 68 void MatVecMulFp32Neon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int col, 69 int align_col); 70 71 #elif defined(ENABLE_ARM32) 72 void MatmulFloatNeon32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row, 73 int col, int stride, size_t writeNhwc, size_t WriteWino); 74 void MatmulFloatNeon32Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row, 75 int col, int stride, int write_mode); 76 void MatmulFloatNeon32Opt12x4(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, 77 int row, int col, int stride, int write_mode); 78 79 #elif defined(ENABLE_AVX) 80 typedef void (*DeconvAvxKernel)(const float *src, const float *weight, float *dst, int col, int row, int depth, 81 int stride); 82 void DeconvMatmulAvx(const float *a, const float *b, float *c, int depth, int row, int col, int kernel_plane); 83 void MatmulFloatAvxOpt(const float *a, const float *b, float *c, const float *bias, size_t act_type, size_t depth, 84 size_t row, size_t col, size_t stride, size_t write_mode); 85 typedef void (*MatVecMulKernel)(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag, 86 size_t row_block, size_t col_block, size_t col_algin, size_t deep); 87 void MatVecMulAvxFp32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int cur_col, 88 int col_align); 89 void MatVecMul1x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag, 90 size_t row_block, size_t col_block, size_t col_algin, size_t deep); 91 void MatVecMul1x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag, 92 size_t row_block, size_t col_block, size_t col_algin, size_t deep); 93 void MatVecMul1x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag, 94 size_t row_block, size_t col_block, size_t col_algin, size_t deep); 95 void MatVecMul1x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag, 96 size_t row_block, size_t col_block, size_t col_algin, size_t deep); 97 #ifdef ENABLE_DEBUG 98 void DeconvColXRowAvxKernel(const float *src, const float *weight, float *dst, int col, int row, int depth, int stride); 99 100 void MatVecMulRowxColKernel(float *dst, const float *src, const float *weight, const float *bias, size_t act_flag, 101 size_t row_block, size_t col_block, size_t col_algin, size_t deep); 102 #endif 103 104 #elif defined(ENABLE_SSE) 105 void DeconvMatmulFloatSse(const float *a, const float *b, float *c, int depth, int row, int col); 106 void MatmulFloatSse64Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row, 107 int col, int stride, int write_mode); 108 #endif 109 110 void MatMul12x8(const float *a, const float *b, float *dst, const float *bias, ActType act_type, int deep, int row, 111 int col, int stride, int out_type); 112 113 #ifdef __cplusplus 114 } 115 #endif 116 #endif // MINDSPORE_NNACL_FP32_MATMUL_H_ 117