Home
last modified time | relevance | path

Searched refs:MS_ADD256_F32 (Results 1 – 10 of 10) sorted by relevance

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dwinograd_avx.c29 t[l] = MS_ADD256_F32(MS_ADD256_F32(src[offset], src[1 + offset]), src[2 + offset]); in OutputTransform4x2AvxUnit()
30 t[l + 4] = MS_ADD256_F32(MS_SUB256_F32(src[1 + offset], src[2 + offset]), src[3 + offset]); in OutputTransform4x2AvxUnit()
34 …m[l] = MS_ADD256_F32(MS_ADD256_F32(MS_ADD256_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_p… in OutputTransform4x2AvxUnit()
35 …m[l + 2] = MS_ADD256_F32(MS_ADD256_F32(MS_SUB256_F32(t[1 + offset], t[2 + offset]), t[3 + offset])… in OutputTransform4x2AvxUnit()
62 t[l] = MS_ADD256_F32(MS_ADD256_F32(src[offset], src[1 + offset]), src[2 + offset]); in OutputTransform4x2ReluAvxUnit()
63 t[l + 4] = MS_ADD256_F32(MS_SUB256_F32(src[1 + offset], src[2 + offset]), src[3 + offset]); in OutputTransform4x2ReluAvxUnit()
67 …m[l] = MS_ADD256_F32(MS_ADD256_F32(MS_ADD256_F32(t[offset], t[1 + offset]), t[2 + offset]), bias_p… in OutputTransform4x2ReluAvxUnit()
68 …m[l + 2] = MS_ADD256_F32(MS_ADD256_F32(MS_SUB256_F32(t[1 + offset], t[2 + offset]), t[3 + offset])… in OutputTransform4x2ReluAvxUnit()
98 t[l] = MS_ADD256_F32(MS_ADD256_F32(src[offset], src[1 + offset]), src[2 + offset]); in OutputTransform4x2Relu6AvxUnit()
99 t[l + 4] = MS_ADD256_F32(MS_SUB256_F32(src[1 + offset], src[2 + offset]), src[3 + offset]); in OutputTransform4x2Relu6AvxUnit()
[all …]
Dexp_fp32.h73 …MS_FLOAT32X8 tmp = MS_MUL256_F32(decimal, (MS_ADD256_F32(param[2], MS_MUL256_F32(decimal, param[1]… in simd_exp_avx()
74 …tmp = MS_MUL256_F32(decimal, MS_ADD256_F32(param[4], MS_MUL256_F32(decimal, MS_ADD256_F32(param[3]… in simd_exp_avx()
75 …MS_FLOAT32X8 decimal_exp = MS_ADD256_F32(param[5], MS_MUL256_F32(decimal, MS_ADD256_F32(param[5], … in simd_exp_avx()
Dadd_fp32.c34 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0_opt_8, vin1); in ElementOptAdd()
52 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0, vin1_opt_8); in ElementOptAdd()
136 MS_FLOAT32X8 vout = MS_MAX256_F32(MS_ADD256_F32(vin0_opt_8, vin1), zeros_8); in ElementOptAddRelu()
154 MS_FLOAT32X8 vout = MS_MAX256_F32(MS_ADD256_F32(vin0, vin1_opt_8), zeros_8); in ElementOptAddRelu()
190 …MS_FLOAT32X8 vout = MS_MIN256_F32(MS_MAX256_F32(MS_ADD256_F32(vin0_opt_8, vin1), zeros_8), bounds_… in ElementOptAddRelu6()
208 …MS_FLOAT32X8 vout = MS_MIN256_F32(MS_MAX256_F32(MS_ADD256_F32(vin0, vin1_opt_8), zeros_8), bounds_… in ElementOptAddRelu6()
239 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0, vin1); in ElementAdd()
264 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0, vin1); in ElementAddRelu()
294 MS_FLOAT32X8 vout = MS_MIN256_F32(MS_MAX256_F32(MS_ADD256_F32(vin0, vin1), zeros_8), bounds_8); in ElementAddRelu6()
Dinstance_norm_fp32.c92MS_ADD256_F32(MS_MUL256_F32(MS_SUB256_F32(srcv8, meanv8), denov8), MS_MOV256_F32(*(beta_data + c))… in InstanceNorm()
289 mean = MS_ADD256_F32(mean, srcv); in InstanceNormNC8HW8()
290 mean1 = MS_ADD256_F32(mean1, srcv1); in InstanceNormNC8HW8()
291 squ_m = MS_ADD256_F32(squ_m, squarev); in InstanceNormNC8HW8()
292 squ_m1 = MS_ADD256_F32(squ_m1, squarev1); in InstanceNormNC8HW8()
299MS_ADD256_F32(MS_SUB256_F32(squ_m, MS_MUL256_F32(mean, mean)), MS_MOV256_F32(param->epsilon_)); in InstanceNormNC8HW8()
301MS_ADD256_F32(MS_SUB256_F32(squ_m1, MS_MUL256_F32(mean1, mean1)), MS_MOV256_F32(param->epsilon_)); in InstanceNormNC8HW8()
313 outv = MS_ADD256_F32(outv, betav); in InstanceNormNC8HW8()
314 outv1 = MS_ADD256_F32(outv1, betav1); in InstanceNormNC8HW8()
326 mean = MS_ADD256_F32(mean, srcv); in InstanceNormNC8HW8()
[all …]
Dresize_fp32.c173 …MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(left, left_w_8), MS_MUL256_F32(right, righ… in InterpRow()
209 …MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(bottom, bottom_w_8), MS_MUL256_F32(top, to… in InterpCol()
333 … MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst3, MS_ADD256_F32(dst2, MS_ADD256_F32(dst1, dst0))); in BicubicInterpRow()
387 … MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst4, MS_ADD256_F32(dst3, MS_ADD256_F32(dst1, dst2))); in BicubicInterpCol()
Dactivation_fp32.c109 … MS_DIV256_F32(MS_MOV256_F32(1.0f), MS_ADD256_F32(MS_MOV256_F32(1.0f), MS_LD256_F32(dst + i)))); in Sigmoid()
244 MS_ADD256_F32( in Gelu()
245 …para3, MS_TANHX8_F32(MS_MUL256_F32(MS_ADD256_F32(para1, MS_MUL256_F32(MS_MUL256_F32(para2, in), in… in Gelu()
Dreduce_fp32.c477 tmp = MS_ADD256_F32(tmp, MS_LD256_F32(inner_src + i * col_len)); in ReduceSumDim2Axis0()
508 tmp_arr_8 = MS_ADD256_F32(tmp_arr_8, src_in); in ReduceSumDim2Axis1()
Dpooling_fp32.c75 tmp_avg = MS_ADD256_F32(tmp_avg, MS_LD256_F32(src_win_ptr)); in AvgPooling()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/
Dms_simd_instructions.h95 #define MS_ADD256_F32 _mm256_add_ps macro
296MS_ADD256_F32(MS_MUL256_F32(MS_ADD256_F32(MS_MUL256_F32(MS_ADD256_F32(square, data0), square), dat… in MS_TANHX8_F32()
299 MS_FLOAT32X8 b = MS_ADD256_F32( in MS_TANHX8_F32()
300 …MS_MUL256_F32(MS_ADD256_F32(MS_MUL256_F32(MS_ADD256_F32(MS_MUL256_F32(data3, square), data4), squa… in MS_TANHX8_F32()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/
Dactivation_grad.c121 … MS_DIV256_F32(MS_LD256_F32(src0 + i), MS_ADD256_F32(MS_MOV256_F32(1.0f), MS_LD256_F32(dst + i)))); in SoftplusGrad()