/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | winograd_avx.c | 26 MS_FLOAT32X8 bias_ptr = MS_LD256_F32(bias_data); in OutputTransform4x2AvxUnit() local 34 …] = MS_ADD256_F32(MS_ADD256_F32(MS_ADD256_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_ptr); in OutputTransform4x2AvxUnit() 35 …MS_ADD256_F32(MS_ADD256_F32(MS_SUB256_F32(t[1 + offset], t[2 + offset]), t[3 + offset]), bias_ptr); in OutputTransform4x2AvxUnit() 59 MS_FLOAT32X8 bias_ptr = MS_LD256_F32(bias_data); in OutputTransform4x2ReluAvxUnit() local 67 …] = MS_ADD256_F32(MS_ADD256_F32(MS_ADD256_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_ptr); in OutputTransform4x2ReluAvxUnit() 68 …MS_ADD256_F32(MS_ADD256_F32(MS_SUB256_F32(t[1 + offset], t[2 + offset]), t[3 + offset]), bias_ptr); in OutputTransform4x2ReluAvxUnit() 95 MS_FLOAT32X8 bias_ptr = MS_LD256_F32(bias_data); in OutputTransform4x2Relu6AvxUnit() local 103 …] = MS_ADD256_F32(MS_ADD256_F32(MS_ADD256_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_ptr); in OutputTransform4x2Relu6AvxUnit() 104 …MS_ADD256_F32(MS_ADD256_F32(MS_SUB256_F32(t[1 + offset], t[2 + offset]), t[3 + offset]), bias_ptr); in OutputTransform4x2Relu6AvxUnit() 131 MS_FLOAT32X8 bias_ptr = MS_LD256_F32(bias_data); in OutputTransform4x3AvxUnit() local [all …]
|
D | common_func_fp32.c | 19 void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_ptr, size_t output_c… in PostConvFuncComm() argument 31 if (bias_ptr != NULL) { in PostConvFuncComm() 32 value = value + bias_ptr[oc]; in PostConvFuncComm() 42 void PostConvFuncFp32C8(const float *c8_out_ptr, float *out_ptr, const float *bias_ptr, size_t outp… in PostConvFuncFp32C8() argument 45 …PostConvFuncComm(c8_out_ptr, out_ptr, bias_ptr, output_channel, plane_size, plane_size, stride, re… in PostConvFuncFp32C8() 50 …PostFuncBiasReluC8(out_ptr, c8_out_ptr, bias_ptr, oc8div, oc8mod, plane_size, stride_size, relu_ty… in PostConvFuncFp32C8() 55 void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bias_ptr, size_t outp… in PostConvFuncFp32C4() argument 61 …PostFuncBiasReluC4(out_ptr, c4_out_ptr, bias_ptr, oc4div, oc4mod, plane_size, stride_size, relu_ty… in PostConvFuncFp32C4() 63 …PostConvFuncComm(c4_out_ptr, out_ptr, bias_ptr, output_channel, plane_size, plane_stride, output_c… in PostConvFuncFp32C4()
|
D | winograd_utils.c | 334 MS_FLOAT32X4 bias_ptr = MS_LDQ_F32(bias_data); in OutputTransform4x2Unit() local 342 m[l] = MS_ADDQ_F32(MS_ADDQ_F32(MS_ADDQ_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_ptr); in OutputTransform4x2Unit() 343 … 2] = MS_ADDQ_F32(MS_ADDQ_F32(MS_SUBQ_F32(t[1 + offset], t[2 + offset]), t[3 + offset]), bias_ptr); in OutputTransform4x2Unit() 397 MS_FLOAT32X4 bias_ptr = MS_LDQ_F32(bias_data); in OutputTransform4x2ReluUnit() local 405 m[l] = MS_ADDQ_F32(MS_ADDQ_F32(MS_ADDQ_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_ptr); in OutputTransform4x2ReluUnit() 406 … 2] = MS_ADDQ_F32(MS_ADDQ_F32(MS_SUBQ_F32(t[1 + offset], t[2 + offset]), t[3 + offset]), bias_ptr); in OutputTransform4x2ReluUnit() 465 MS_FLOAT32X4 bias_ptr = MS_LDQ_F32(bias_data); in OutputTransform4x2Relu6Unit() local 473 m[l] = MS_ADDQ_F32(MS_ADDQ_F32(MS_ADDQ_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_ptr); in OutputTransform4x2Relu6Unit() 474 … 2] = MS_ADDQ_F32(MS_ADDQ_F32(MS_SUBQ_F32(t[1 + offset], t[2 + offset]), t[3 + offset]), bias_ptr); in OutputTransform4x2Relu6Unit() 534 MS_FLOAT32X4 bias_ptr = MS_LDQ_F32(bias_data); in OutputTransform4x3Unit() local [all …]
|
D | winograd_transform.c | 136 const float *bias_ptr = bias_data + j * C4NUM; in WinogradOutputNHWCTransform() local 138 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, output_channel, r_w, r_h, r_c); in WinogradOutputNHWCTransform() 148 const float *bias_ptr = bias_data + j * C8NUM; in WinogradOutputNHWCTransform() local 150 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, output_channel, r_w, r_h, r_c); in WinogradOutputNHWCTransform() 193 const float *bias_ptr = bias_data + j * C8NUM; in WinogradOutputNC4HW4Transform() local 195 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, r_c, r_w, r_h, r_c); in WinogradOutputNC4HW4Transform() 206 const float *bias_ptr = bias_data + j * C4NUM; in WinogradOutputNC4HW4Transform() local 208 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, r_c, r_w, r_h, r_c); in WinogradOutputNC4HW4Transform()
|
D | common_func_fp32.h | 42 void PostConvFuncFp32C8(const float *c8_out_ptr, float *out_ptr, const float *bias_ptr, size_t outp… 44 void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bias_ptr, size_t outp…
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/ |
D | winograd_utils_fp16.c | 470 float16x8_t bias_ptr = vld1q_f16(bias_data); in OutputTransform4x2UnitFp16() local 478 m[l] = vaddq_f16(vaddq_f16(vaddq_f16(t[offset], t[1 + offset]), t[2 + offset]), bias_ptr); in OutputTransform4x2UnitFp16() 479 … m[l + 2] = vaddq_f16(vaddq_f16(vsubq_f16(t[1 + offset], t[2 + offset]), t[3 + offset]), bias_ptr); in OutputTransform4x2UnitFp16() 498 float16x4_t bias_ptr = vld1_f16(bias_data); in OutputTransform4x2UnitFp16() local 506 m[l] = vadd_f16(vadd_f16(vadd_f16(t[offset], t[1 + offset]), t[2 + offset]), bias_ptr); in OutputTransform4x2UnitFp16() 507 … m[l + 2] = vadd_f16(vadd_f16(vsub_f16(t[1 + offset], t[2 + offset]), t[3 + offset]), bias_ptr); in OutputTransform4x2UnitFp16() 531 float16_t bias_ptr = bias_data[z]; in OutputTransform4x2UnitFp16() local 539 m[l] = t[offset] + t[1 + offset] + t[2 + offset] + bias_ptr; in OutputTransform4x2UnitFp16() 540 m[l + 2] = t[1 + offset] - t[2 + offset] + t[3 + offset] + bias_ptr; in OutputTransform4x2UnitFp16() 561 float16x8_t bias_ptr = vld1q_f16(bias_data); in OutputTransform4x2ReluUnitFp16() local [all …]
|
D | common_func_fp16.c | 19 void PostConvFuncCommFp16(float16_t *out_ptr, const float16_t *src_ptr_, const float16_t *bias_ptr, in PostConvFuncCommFp16() argument 31 if (bias_ptr != NULL) { in PostConvFuncCommFp16() 32 value = value + bias_ptr[oc]; in PostConvFuncCommFp16()
|
D | matrix_fp16.c | 70 float16x8_t bias_ptr = vld1q_f16(bias); in MatrixMultiplyVecFp16() local 78 matrix_c[count] = vaddq_f16(res, bias_ptr); in MatrixMultiplyVecFp16()
|
D | winograd_transform_fp16.c | 159 const float16_t *bias_ptr = bias_data + j * C8NUM; in WinogradOutputNHWCTransformFp16() local 161 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, output_channel, r_w, r_h, r_c); in WinogradOutputNHWCTransformFp16() 199 const float16_t *bias_ptr = bias_data + j * C8NUM; in WinogradOutputNC8HW8TransformFp16() local 201 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, r_c, r_w, r_h, r_c); in WinogradOutputNC8HW8TransformFp16()
|
D | common_func_fp16.h | 26 void PostConvFuncFp16C8(const float16_t *c8_out_ptr, float16_t *out_ptr, const float16_t *bias_ptr,
|
/third_party/mindspore/mindspore/lite/micro/coder/wrapper/int8/ |
D | matmul_int8_wrapper.c | 34 … int col, int col_align, int deep_16, int input_zp, const int *weight_zp, const int *bias_ptr, in InitInt8MatrixB() argument 46 …CalcWeightBiasSums(cur_b, deep, col, input_zp, weight_zp, bias_ptr, cur_sums, ColMajor, filter_per… in InitInt8MatrixB() 53 … CalcWeightBiasSums(cur_b, deep, col, input_zp, weight_zp, bias_ptr, cur_sums, RowMajor, false); in InitInt8MatrixB()
|
D | matmul_int8_wrapper.h | 29 … int col, int col_align, int deep_16, int input_zp, const int *weight_zp, const int *bias_ptr,
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/ |
D | conv3x3_int8.c | 465 int32x4_t bias_ptr = vld1q_s32(bias_data); in Conv3x3Int8OutputUnit() local 497 int32x4_t d00 = vaddq_s32(vshrq_n_s32(vaddq_s32(vaddq_s32(t00, t01), t02), 1), bias_ptr); in Conv3x3Int8OutputUnit() 498 int32x4_t d01 = vaddq_s32(vshrq_n_s32(vsubq_s32(vsubq_s32(t01, t02), t03), 1), bias_ptr); in Conv3x3Int8OutputUnit() 500 int32x4_t d10 = vaddq_s32(vshrq_n_s32(vaddq_s32(vaddq_s32(t10, t11), t12), 1), bias_ptr); in Conv3x3Int8OutputUnit() 501 int32x4_t d11 = vaddq_s32(vshrq_n_s32(vsubq_s32(vsubq_s32(t11, t12), t13), 1), bias_ptr); in Conv3x3Int8OutputUnit() 586 const int32_t *bias_ptr = bias_data + i; in Conv3x3Int8OutputUnit() local 618 int32_t d00 = (t00 + t01 + t02) / 2 + bias_ptr[0]; in Conv3x3Int8OutputUnit() 619 int32_t d01 = (t01 - t02 - t03) / 2 + bias_ptr[0]; in Conv3x3Int8OutputUnit() 621 int32_t d10 = (t10 + t11 + t12) / 2 + bias_ptr[0]; in Conv3x3Int8OutputUnit() 622 int32_t d11 = (t11 - t12 - t13) / 2 + bias_ptr[0]; in Conv3x3Int8OutputUnit() [all …]
|
D | conv_depthwise_int8.c | 258 const int32_t *bias_ptr = bias; in ConvDw3x3Int8Row() local 265 …ConvDw3x3Int8Block(output_ptr, buffer, weight_ptr, bias_ptr, 0, 64, 64, ih_offset, conv_param->inp… in ConvDw3x3Int8Row() 271 bias_ptr += 64; in ConvDw3x3Int8Row() 279 ConvDw3x3Int8Block(output_ptr, input_ptr, weight_ptr, bias_ptr, c, conv_param->input_channel_, in ConvDw3x3Int8Row()
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/opencl/kernel/ |
D | depthwise_conv2d.cc | 184 float16_t *bias_ptr = static_cast<float16_t *>(dst); in InitBias() local 186 bias_ptr[i] = static_cast<float16_t>(static_cast<float *>(src)[i]); in InitBias() 189 float32_t *bias_ptr = static_cast<float32_t *>(dst); in InitBias() local 191 bias_ptr[i] = static_cast<float32_t>(static_cast<float16_t *>(src)[i]); in InitBias()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/ |
D | minimal_filtering_generator.c | 237 MS_FLOAT32X4 bias_ptr = MS_MOVQ_F32(0); in MatrixMultiplyVec() local 239 bias_ptr = MS_LDQ_F32(bias); in MatrixMultiplyVec() 248 matrix_c[count] = MS_ADDQ_F32(res, bias_ptr); in MatrixMultiplyVec()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/sse/ |
D | sse_common.h | 324 static inline void DoBiasBlock8(const float *bias_ptr, __m128 *dst1, __m128 *dst2, __m128 *dst3, __… in DoBiasBlock8() argument 326 __m128 bias1 = _mm_loadu_ps(bias_ptr); in DoBiasBlock8() 327 __m128 bias2 = _mm_loadu_ps(bias_ptr + C4NUM); in DoBiasBlock8()
|
/third_party/mindspore/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/ |
D | matmul_fp32_tests.cc | 91 float *bias_ptr, const std::vector<int> &a_shape, const std::vector<int> &b_shape, in MMTestInit2() argument 105 memcpy(bias_t->MutableData(), bias_ptr, sizeof(float) * bias_t->ElementsNum()); in MMTestInit2()
|