/third_party/mindspore/mindspore/lite/micro/coder/wrapper/int8/ |
D | conv1x1_init_int8_wrapper.c | 22 int32_t output_channel, int32_t input_zp, bool support_optimize, bool filter_peroc, in Conv1x1Init() argument 30 size_t size = UP_ROUND(input_channel, C16NUM) * UP_ROUND(output_channel, C2NUM) * sizeof(int8_t); in Conv1x1Init() 36 RowMajor2Row2x16MajorInt8(src_weight, packed_weight_, output_channel, input_channel); in Conv1x1Init() 38 size = (size_t)UP_ROUND(output_channel, C2NUM); in Conv1x1Init() 46 memcpy(bias_data_, src_bias, (size_t)output_channel * sizeof(int32_t)); in Conv1x1Init() 51 …size_t size = support_optimize ? UP_ROUND(input_channel, C4NUM) * UP_ROUND(output_channel, C16NUM)… in Conv1x1Init() 52 … : UP_ROUND(input_channel, C16NUM) * UP_ROUND(output_channel, C4NUM) * sizeof(int8_t); in Conv1x1Init() 59 RowMajor2Row4x16MajorInt8(src_weight, packed_weight_, output_channel, input_channel); in Conv1x1Init() 61 RowMajor2Row16x4MajorInt8(src_weight, packed_weight_, output_channel, input_channel); in Conv1x1Init() 64 size = support_optimize ? UP_ROUND(output_channel, C16NUM) : UP_ROUND(output_channel, C4NUM); in Conv1x1Init() [all …]
|
D | conv_init_int8_wrapper.c | 23 … int kernel_w, int input_channel, int output_channel, int32_t input_zp, bool filter_peroc, in ConvInit() argument 31 up_round_oc = UP_ROUND(output_channel, C2NUM); in ConvInit() 35 up_round_oc = UP_ROUND(output_channel, C8NUM); in ConvInit() 38 up_round_oc = UP_ROUND(output_channel, C4NUM); in ConvInit() 52 …RowMajor2Row2x16MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_pl… in ConvInit() 55 …RowMajor2Row8x4MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_pla… in ConvInit() 57 …RowMajor2Row16x4MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_pl… in ConvInit() 69 memcpy(bias_data_, ori_bias, (unsigned int)output_channel * sizeof(int32_t)); in ConvInit() 72 for (int oc = 0; oc < output_channel; oc++) { in ConvInit()
|
/third_party/mindspore/tests/st/fusion/ |
D | test_conv_bn1_fusion.py | 26 output_channel = 512 variable 60 self.conv = nn.Conv2d(input_channel, output_channel, 62 self.conv1 = nn.Conv2d(input_channel, output_channel, 64 self.bn = nn.BatchNorm2d(output_channel, momentum=0.1, eps=0.0001) 69 self.dense = nn.Dense(output_channel, num_class) 77 output = self.reshape(output, (batch_size, output_channel)) 91 self.conv = nn.Conv2d(input_channel, output_channel, 93 self.bn = nn.BatchNorm2d(output_channel, momentum=0.1, eps=0.0001) 97 self.dense = nn.Dense(output_channel, num_class) 104 output = self.reshape(output, (batch_size, output_channel)) [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | common_func_fp32.c | 19 …stConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_ptr, size_t output_channel, in PostConvFuncComm() argument 24 for (size_t oc = 0; oc < output_channel; oc++) { in PostConvFuncComm() 42 …nvFuncFp32C8(const float *c8_out_ptr, float *out_ptr, const float *bias_ptr, size_t output_channel, in PostConvFuncFp32C8() argument 45 …PostConvFuncComm(c8_out_ptr, out_ptr, bias_ptr, output_channel, plane_size, plane_size, stride, re… in PostConvFuncFp32C8() 47 size_t oc8mod = output_channel % C8NUM; in PostConvFuncFp32C8() 48 size_t oc8div = output_channel - oc8mod; in PostConvFuncFp32C8() 55 …nvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bias_ptr, size_t output_channel, in PostConvFuncFp32C4() argument 58 size_t oc4mod = output_channel % C4NUM; in PostConvFuncFp32C4() 59 size_t oc4div = output_channel - oc4mod; in PostConvFuncFp32C4() 63 …PostConvFuncComm(c4_out_ptr, out_ptr, bias_ptr, output_channel, plane_size, plane_stride, output_c… in PostConvFuncFp32C4()
|
D | winograd_transform.c | 104 int output_channel = conv_param->output_channel_; in WinogradOutputNHWCTransform() local 106 int oc4 = UP_DIV(output_channel, C4NUM); in WinogradOutputNHWCTransform() 108 int oc8 = UP_DIV(output_channel, C8NUM); in WinogradOutputNHWCTransform() 125 int dst_tile_offset = output_channel * (dst_x_s + dst_y_s * output_w); in WinogradOutputNHWCTransform() 131 int r_c = output_channel - j * C4NUM; in WinogradOutputNHWCTransform() 138 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, output_channel, r_w, r_h, r_c); in WinogradOutputNHWCTransform() 143 int r_c = output_channel - j * C8NUM; in WinogradOutputNHWCTransform() 150 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, output_channel, r_w, r_h, r_c); in WinogradOutputNHWCTransform() 164 int output_channel = conv_param->output_channel_; in WinogradOutputNC4HW4Transform() local 166 int oc4 = UP_DIV(output_channel, C4NUM); in WinogradOutputNC4HW4Transform() [all …]
|
D | deconv_fp32.c | 19 void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, int output_channel, i… in PackDeConvWeightFp32() argument 22 for (int oc = 0; oc < output_channel; oc++) { in PackDeConvWeightFp32() 27 int src_index = ic * plane * output_channel + hw * output_channel + oc; in PackDeConvWeightFp32() 36 …d DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *dst, int output_channel, in DeConvPostFp32C8() argument 43 int oc8 = UP_ROUND(output_channel, C8NUM); in DeConvPostFp32C8() 108 …PostConvFuncFp32C8(tmp, dst, bias, output_channel, output_plane, conv_param->output_channel_, conv… in DeConvPostFp32C8()
|
D | deconv_fp32.h | 30 void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, int output_channel, i… 31 …ConvPostFp32C8(const float *src, float *tmp_out, const float *bias, float *dst, int output_channel,
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/int8/ |
D | convolution_int8.cc | 53 auto output_channel = filter_tensor->Batch(); in InitWeightBias() local 56 conv_param_->output_channel_ = output_channel; in InitWeightBias() 60 up_round_oc = UP_ROUND(output_channel, C2NUM); in InitWeightBias() 64 up_round_oc = UP_ROUND(output_channel, C8NUM); in InitWeightBias() 67 up_round_oc = UP_ROUND(output_channel, C4NUM); in InitWeightBias() 85 …RowMajor2Row2x16MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_pl… in InitWeightBias() 88 …RowMajor2Row8x4MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_pla… in InitWeightBias() 90 …RowMajor2Row16x4MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_pl… in InitWeightBias() 104 memcpy(bias_data_, ori_bias, static_cast<size_t>(output_channel) * sizeof(int32_t)); in InitWeightBias() 111 filter_zp_ptr_ = reinterpret_cast<int32_t *>(malloc(output_channel * sizeof(int32_t))); in InitWeightBias() [all …]
|
D | convolution_1x1_int8.cc | 154 …lution1x1Int8CPUKernel::InitBiasByzp(const void *src_weight, int input_channel, int output_channel, in InitBiasByzp() argument 162 for (int oc = 0; oc < output_channel; oc++) { in InitBiasByzp() 178 for (int fi = 0; fi < output_channel; fi++) { in InitBiasByzp() 188 memcpy(left_shift_, conv_param_->conv_quant_arg_.left_shift_, output_channel * sizeof(int32_t)); in InitBiasByzp() 196 … memcpy(right_shift_, conv_param_->conv_quant_arg_.right_shift_, output_channel * sizeof(int32_t)); in InitBiasByzp() 204 …memcpy(multiplier_, conv_param_->conv_quant_arg_.quant_multiplier_, output_channel * sizeof(int32_… in InitBiasByzp() 221 auto output_channel = filter_tensor->Batch(); in InitWeightBias() local 222 if (output_channel < 0) { in InitWeightBias() 227 …size_t size = support_optimize_ ? UP_ROUND(input_channel, C4NUM) * UP_ROUND(output_channel, C16NUM… in InitWeightBias() 228 … : UP_ROUND(input_channel, C16NUM) * UP_ROUND(output_channel, C4NUM) * sizeof(int8_t); in InitWeightBias() [all …]
|
D | convolution_3x3_int8.cc | 32 auto output_channel = conv_param->output_channel_; in ProcessFilterUint8() local 36 size_t tmp_size = output_channel * iC8 * C8NUM * kernel_plane * sizeof(int16_t); in ProcessFilterUint8() 43 Conv3x3Int8FilterTransform(tmp_addr, dst_weight, iC8, output_channel, kernel_plane); in ProcessFilterUint8() 88 auto output_channel = filter_tensor->Batch(); in InitWeightBias() local 89 if (output_channel < 0) { in InitWeightBias() 94 conv_param_->output_channel_ = output_channel; in InitWeightBias() 96 int oC4 = UP_DIV(output_channel, C4NUM); in InitWeightBias() 125 memcpy(bias_data_, ori_bias_addr, static_cast<size_t>(output_channel) * sizeof(int32_t)); in InitWeightBias()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/ |
D | deconv_int8.c | 20 …vPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel, in DeConvPostInt8C4() argument 26 int oc4 = UP_DIV(output_channel, C4NUM); in DeConvPostInt8C4() 84 PostFuncInt8C4(tmp, bias, out, output_channel, (size_t)output_plane, conv_param->output_channel_, in DeConvPostInt8C4() 91 void DeConvWeightTransInt8(const int8_t *src, int8_t *dst, int input_channel, int output_channel, i… in DeConvWeightTransInt8() argument 95 int oc4 = UP_ROUND(output_channel, C4NUM); in DeConvWeightTransInt8() 98 for (int oc = 0; oc < output_channel; oc++) { in DeConvWeightTransInt8() 101 int src_index = ic * output_channel * plane + hw * output_channel + oc; in DeConvWeightTransInt8() 145 …onvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel, in DeConvPostInt8() argument 148 int error_code = DeConvPostInt8C4(src, bias, tmp, out, output_channel, conv_param); in DeConvPostInt8()
|
D | deconv_int8.h | 34 void DeConvWeightTransInt8(const int8_t *src, int8_t *dst, int input_channel, int output_channel, i… 40 …onvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
|
/third_party/mindspore/mindspore/lite/micro/coder/opcoders/nnacl/int8/ |
D | conv2d_1x1_int8_coder.cc | 153 int32_t output_channel = filter_tensor_->Batch(); in InitWeightBias() local 156 MS_CHECK_TRUE(output_channel > 0, "output_channel should be positive"); in InitWeightBias() 177 … output_channel, input_zp, "GetSupportOptFlag()", filter_peroc_, packed_weight_str, in InitWeightBias() 181 … output_channel, input_zp, support_optimize_, filter_peroc_, packed_weight_str, in InitWeightBias() 190 int32_t output_channel = filter_tensor_->Batch(); in InitFilterPeroc() local 193 round_oc = UP_ROUND(output_channel, C2NUM); in InitFilterPeroc() 195 round_oc = MSMAX(UP_ROUND(output_channel, C16NUM), UP_ROUND(output_channel, C4NUM)); in InitFilterPeroc() 198 MS_CHECK_TRUE(conv_quant_arg_->filter_arg_num_ == static_cast<size_t>(output_channel), in InitFilterPeroc() 200 size_t output_size = output_channel * sizeof(int32_t); in InitFilterPeroc() 207 for (int fi = 0; fi < output_channel; fi++) { in InitFilterPeroc()
|
D | conv2d_3x3_int8_coder.cc | 29 int output_channel = conv_param->output_channel_; in ProcessFilterUint8() local 33 size_t tmp_size = output_channel * iC8 * C8NUM * kernel_plane * sizeof(int16_t); in ProcessFilterUint8() 43 Conv3x3Int8FilterTransform(tmp_addr, dst_weight, iC8, output_channel, kernel_plane); in ProcessFilterUint8() 49 int output_channel = conv_param_->output_channel_; in InitWeightBias() local 51 MS_CHECK_TRUE(output_channel > 0, "invalid output_channel"); in InitWeightBias() 53 int oC4 = UP_DIV(output_channel, C4NUM); in InitWeightBias() 71 …MS_CHECK_RET_CODE(memcpy_s(new_bias_addr_, new_bias_size, ori_bias_addr, output_channel * sizeof(i… in InitWeightBias()
|
D | conv2d_int8_coder.cc | 88 int32_t output_channel = filter_tensor_->Batch(); in InitWeightBias() local 92 conv_param_->output_channel_ = output_channel; in InitWeightBias() 93 auto output_channel_size = static_cast<size_t>(output_channel); in InitWeightBias() 105 for (int oc = 0; oc < output_channel; oc++) { in InitWeightBias() 113 up_round_oc = UP_ROUND(output_channel, C2NUM); in InitWeightBias() 116 up_round_oc = MSMAX(UP_ROUND(output_channel, C8NUM), UP_ROUND(output_channel, C4NUM)); in InitWeightBias() 119 up_round_oc = UP_ROUND(output_channel, C8NUM); in InitWeightBias() 154 … input_channel, output_channel, input_zp, filter_peroc_, "GetSupportOptFlag()", in InitWeightBias() 158 … input_channel, output_channel, input_zp, filter_peroc_, support_optimize_, in InitWeightBias()
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32/ |
D | convolution_slidewindow_fp32.cc | 43 auto output_channel = filter_tensor->Batch(); in Init() local 47 int oc_block_num = UP_DIV(output_channel, oc_tile_); in Init() 191 auto output_channel = filter_tensor->Batch(); in PackWeight() local 194 int oc_block_num = UP_DIV(output_channel, oc_tile_); in PackWeight() 197 PackNHWCToNXHWCXFp32(kernel_h, kernel_w, output_channel, oc_block_num, input_channel, in PackWeight() 204 auto output_channel = filter_tensor->Batch(); in MallocWeightBiasData() local 208 conv_param_->output_channel_ = output_channel; in MallocWeightBiasData() 210 int oc_block_num = UP_DIV(output_channel, oc_tile_); in MallocWeightBiasData()
|
D | convolution_1x1_fp32.cc | 123 auto output_channel = filter_tensor->Batch(); in Init() local 124 int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float); in Init() 278 auto output_channel = filter_tensor->Batch(); in PackWeight() local 279 if (output_channel < 0) { in PackWeight() 288 output_channel, input_channel); in PackWeight() 291 output_channel, input_channel); in PackWeight() 294 output_channel, input_channel); in PackWeight() 301 auto output_channel = filter_tensor->Batch(); in MallocWeightBiasData() local 302 int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float); in MallocWeightBiasData() 312 size = UP_ROUND(output_channel, col_tile_) * sizeof(float); in MallocWeightBiasData()
|
D | deconvolution_fp32.cc | 59 auto output_channel = weight_tensor->Channel(); in MallocWeightBiasData() local 62 int output_aligned_size = UP_ROUND(output_channel, C8NUM); in MallocWeightBiasData() 84 auto output_channel = weight_tensor->Channel(); in PackWeight() local 91 input_channel, kernel_w * kernel_h, output_channel); in PackWeight() 94 input_channel, kernel_w * kernel_h, output_channel); in PackWeight() 179 auto output_channel = weight_tensor->Channel(); in Init() local 182 int output_aligned_size = UP_ROUND(output_channel, C8NUM); in Init()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/sse/ |
D | ConvDwFp32Row_sse.c | 22 size_t output_channel, size_t input_step) { in ConvDwFp32Row() argument 23 size_t out_c16 = DOWN_DIV(output_channel, C16NUM) * C16NUM; in ConvDwFp32Row() 24 size_t out_c8 = DOWN_DIV(output_channel, C8NUM) * C8NUM; in ConvDwFp32Row() 25 size_t out_c4 = DOWN_DIV(output_channel, C4NUM) * C4NUM; in ConvDwFp32Row() 80 for (; out_c < output_channel; out_c++) { in ConvDwFp32Row()
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp16/ |
D | convolution_1x1_fp16.cc | 86 auto output_channel = weight_tensor->Batch(); in MallocWeightBiasData() local 88 size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t); in MallocWeightBiasData() 101 size = UP_ROUND(output_channel, col_tile_) * sizeof(float16_t); in MallocWeightBiasData() 117 auto output_channel = weight_tensor->Batch(); in PackWeight() local 122 …orFp16(weight_origin, reinterpret_cast<float16_t *>(packed_weight_), input_channel, output_channel, in PackWeight() 126 … reinterpret_cast<float16_t *>(packed_weight_), output_channel, input_channel); in PackWeight() 129 …orFp16(weight_origin, reinterpret_cast<float16_t *>(packed_weight_), input_channel, output_channel, in PackWeight() 154 auto output_channel = weight_tensor->Batch(); in Init() local 155 size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t); in Init()
|
D | deconvolution_fp16.cc | 59 auto output_channel = weight_tensor->Channel(); in PackWeight() local 65 input_channel, kernel_w * kernel_h, output_channel); in PackWeight() 71 auto output_channel = weight_tensor->Channel(); in MallocWeightBiasData() local 74 …size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * … in MallocWeightBiasData() 83 auto bias_size = UP_ROUND(output_channel, C8NUM) * sizeof(float16_t); in MallocWeightBiasData() 89 memset(bias_data_, 0, UP_ROUND(output_channel, C8NUM) * sizeof(float16_t)); in MallocWeightBiasData() 193 auto output_channel = weight_tensor->Channel(); in Init() local 196 …size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * … in Init()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/ |
D | winograd_transform_fp16.c | 134 int output_channel = conv_param->output_channel_; in WinogradOutputNHWCTransformFp16() local 135 int oc8 = UP_DIV(output_channel, C8NUM); in WinogradOutputNHWCTransformFp16() 151 int dst_tile_offset = output_channel * (dst_x_s + dst_y_s * output_w); in WinogradOutputNHWCTransformFp16() 154 int r_c = output_channel - j * C8NUM; in WinogradOutputNHWCTransformFp16() 161 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, output_channel, r_w, r_h, r_c); in WinogradOutputNHWCTransformFp16() 174 int output_channel = conv_param->output_channel_; in WinogradOutputNC8HW8TransformFp16() local 175 int oc8 = UP_DIV(output_channel, C8NUM); in WinogradOutputNC8HW8TransformFp16() 194 int r_c = output_channel - j * C8NUM; in WinogradOutputNC8HW8TransformFp16()
|
D | deconv_winograd_fp16.c | 245 int output_channel = conv_param->output_channel_; in PackDeConvWgDataFp16() local 246 int size = conv_param->input_channel_ * output_channel * tmp_kernel_plane; in PackDeConvWgDataFp16() 252 const float16_t *src_ic = nhwc_weight + deconv_param->kernel_plane_ * output_channel * ic; in PackDeConvWgDataFp16() 253 float16_t *dst_ic = current_unit_weight + tmp_kernel_plane * output_channel * ic; in PackDeConvWgDataFp16() 258 …loat16_t *src_hw = src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * output_channel; in PackDeConvWgDataFp16() 259 float16_t *dst_hw = dst_ic + (uhi * unit->w_size_ + uwi) * output_channel; in PackDeConvWgDataFp16() 260 memcpy(dst_hw, src_hw, output_channel * sizeof(float16_t)); in PackDeConvWgDataFp16() 301 size = conv_param->input_channel_ * output_channel * unit->winograd_.kh_ * unit->winograd_.kw_; in PackDeConvWgDataFp16() 314 … unit->winograd_.kh_, unit->h_size_, output_channel, conv_param->input_channel_, false); in PackDeConvWgDataFp16() 328 for (int oc = 0; oc < output_channel; oc++) { in PackDeConvWgDataFp16() [all …]
|
D | deconv_fp16.c | 19 …16(const float16_t *src, float16_t *tmp, const float16_t *bias, float16_t *dst, int output_channel, in DeConvPostFp16() argument 25 int oc8 = UP_ROUND(output_channel, C8NUM); in DeConvPostFp16() 82 …PostConvFuncFp16C8(tmp, dst, bias, output_channel, output_plane, conv_param->output_channel_, conv… in DeConvPostFp16()
|
D | common_func_fp16.h | 27 size_t output_channel, size_t plane_size, size_t stride, ActType act_type); 32 …cFp16C4(const float16_t *c4_out, float16_t *nhwc_out, const float16_t *bias, size_t output_channel,
|