/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | winograd_avx.c | 29 t[l] = MS_ADD256_F32(MS_ADD256_F32(src[offset], src[1 + offset]), src[2 + offset]); in OutputTransform4x2AvxUnit() 30 t[l + 4] = MS_ADD256_F32(MS_SUB256_F32(src[1 + offset], src[2 + offset]), src[3 + offset]); in OutputTransform4x2AvxUnit() 34 …m[l] = MS_ADD256_F32(MS_ADD256_F32(MS_ADD256_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_p… in OutputTransform4x2AvxUnit() 35 …m[l + 2] = MS_ADD256_F32(MS_ADD256_F32(MS_SUB256_F32(t[1 + offset], t[2 + offset]), t[3 + offset])… in OutputTransform4x2AvxUnit() 62 t[l] = MS_ADD256_F32(MS_ADD256_F32(src[offset], src[1 + offset]), src[2 + offset]); in OutputTransform4x2ReluAvxUnit() 63 t[l + 4] = MS_ADD256_F32(MS_SUB256_F32(src[1 + offset], src[2 + offset]), src[3 + offset]); in OutputTransform4x2ReluAvxUnit() 67 …m[l] = MS_ADD256_F32(MS_ADD256_F32(MS_ADD256_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_p… in OutputTransform4x2ReluAvxUnit() 68 …m[l + 2] = MS_ADD256_F32(MS_ADD256_F32(MS_SUB256_F32(t[1 + offset], t[2 + offset]), t[3 + offset])… in OutputTransform4x2ReluAvxUnit() 98 t[l] = MS_ADD256_F32(MS_ADD256_F32(src[offset], src[1 + offset]), src[2 + offset]); in OutputTransform4x2Relu6AvxUnit() 99 t[l + 4] = MS_ADD256_F32(MS_SUB256_F32(src[1 + offset], src[2 + offset]), src[3 + offset]); in OutputTransform4x2Relu6AvxUnit() [all …]
|
D | exp_fp32.h | 73 …MS_FLOAT32X8 tmp = MS_MUL256_F32(decimal, (MS_ADD256_F32(param[2], MS_MUL256_F32(decimal, param[1]… in simd_exp_avx() 74 …tmp = MS_MUL256_F32(decimal, MS_ADD256_F32(param[4], MS_MUL256_F32(decimal, MS_ADD256_F32(param[3]… in simd_exp_avx() 75 …MS_FLOAT32X8 decimal_exp = MS_ADD256_F32(param[5], MS_MUL256_F32(decimal, MS_ADD256_F32(param[5], … in simd_exp_avx()
|
D | add_fp32.c | 34 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0_opt_8, vin1); in ElementOptAdd() 52 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0, vin1_opt_8); in ElementOptAdd() 136 MS_FLOAT32X8 vout = MS_MAX256_F32(MS_ADD256_F32(vin0_opt_8, vin1), zeros_8); in ElementOptAddRelu() 154 MS_FLOAT32X8 vout = MS_MAX256_F32(MS_ADD256_F32(vin0, vin1_opt_8), zeros_8); in ElementOptAddRelu() 190 …MS_FLOAT32X8 vout = MS_MIN256_F32(MS_MAX256_F32(MS_ADD256_F32(vin0_opt_8, vin1), zeros_8), bounds_… in ElementOptAddRelu6() 208 …MS_FLOAT32X8 vout = MS_MIN256_F32(MS_MAX256_F32(MS_ADD256_F32(vin0, vin1_opt_8), zeros_8), bounds_… in ElementOptAddRelu6() 239 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0, vin1); in ElementAdd() 264 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0, vin1); in ElementAddRelu() 294 MS_FLOAT32X8 vout = MS_MIN256_F32(MS_MAX256_F32(MS_ADD256_F32(vin0, vin1), zeros_8), bounds_8); in ElementAddRelu6()
|
D | instance_norm_fp32.c | 92 …MS_ADD256_F32(MS_MUL256_F32(MS_SUB256_F32(srcv8, meanv8), denov8), MS_MOV256_F32(*(beta_data + c))… in InstanceNorm() 289 mean = MS_ADD256_F32(mean, srcv); in InstanceNormNC8HW8() 290 mean1 = MS_ADD256_F32(mean1, srcv1); in InstanceNormNC8HW8() 291 squ_m = MS_ADD256_F32(squ_m, squarev); in InstanceNormNC8HW8() 292 squ_m1 = MS_ADD256_F32(squ_m1, squarev1); in InstanceNormNC8HW8() 299 … MS_ADD256_F32(MS_SUB256_F32(squ_m, MS_MUL256_F32(mean, mean)), MS_MOV256_F32(param->epsilon_)); in InstanceNormNC8HW8() 301 … MS_ADD256_F32(MS_SUB256_F32(squ_m1, MS_MUL256_F32(mean1, mean1)), MS_MOV256_F32(param->epsilon_)); in InstanceNormNC8HW8() 313 outv = MS_ADD256_F32(outv, betav); in InstanceNormNC8HW8() 314 outv1 = MS_ADD256_F32(outv1, betav1); in InstanceNormNC8HW8() 326 mean = MS_ADD256_F32(mean, srcv); in InstanceNormNC8HW8() [all …]
|
D | resize_fp32.c | 173 …MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(left, left_w_8), MS_MUL256_F32(right, righ… in InterpRow() 209 …MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(bottom, bottom_w_8), MS_MUL256_F32(top, to… in InterpCol() 333 … MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst3, MS_ADD256_F32(dst2, MS_ADD256_F32(dst1, dst0))); in BicubicInterpRow() 387 … MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst4, MS_ADD256_F32(dst3, MS_ADD256_F32(dst1, dst2))); in BicubicInterpCol()
|
D | activation_fp32.c | 109 … MS_DIV256_F32(MS_MOV256_F32(1.0f), MS_ADD256_F32(MS_MOV256_F32(1.0f), MS_LD256_F32(dst + i)))); in Sigmoid() 244 MS_ADD256_F32( in Gelu() 245 …para3, MS_TANHX8_F32(MS_MUL256_F32(MS_ADD256_F32(para1, MS_MUL256_F32(MS_MUL256_F32(para2, in), in… in Gelu()
|
D | reduce_fp32.c | 477 tmp = MS_ADD256_F32(tmp, MS_LD256_F32(inner_src + i * col_len)); in ReduceSumDim2Axis0() 508 tmp_arr_8 = MS_ADD256_F32(tmp_arr_8, src_in); in ReduceSumDim2Axis1()
|
D | pooling_fp32.c | 75 tmp_avg = MS_ADD256_F32(tmp_avg, MS_LD256_F32(src_win_ptr)); in AvgPooling()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ |
D | ms_simd_instructions.h | 95 #define MS_ADD256_F32 _mm256_add_ps macro 296 …MS_ADD256_F32(MS_MUL256_F32(MS_ADD256_F32(MS_MUL256_F32(MS_ADD256_F32(square, data0), square), dat… in MS_TANHX8_F32() 299 MS_FLOAT32X8 b = MS_ADD256_F32( in MS_TANHX8_F32() 300 …MS_MUL256_F32(MS_ADD256_F32(MS_MUL256_F32(MS_ADD256_F32(MS_MUL256_F32(data3, square), data4), squa… in MS_TANHX8_F32()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/ |
D | activation_grad.c | 121 … MS_DIV256_F32(MS_LD256_F32(src0 + i), MS_ADD256_F32(MS_MOV256_F32(1.0f), MS_LD256_F32(dst + i)))); in SoftplusGrad()
|