Home
last modified time | relevance | path

Searched refs:MS_FLOAT32X8 (Results 1 – 13 of 13) sorted by relevance

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dwinograd_avx.c22 MS_FLOAT32X8 src[16]; in OutputTransform4x2AvxUnit()
23 MS_FLOAT32X8 t[8]; in OutputTransform4x2AvxUnit()
24 MS_FLOAT32X8 m[4]; in OutputTransform4x2AvxUnit()
26 MS_FLOAT32X8 bias_ptr = MS_LD256_F32(bias_data); in OutputTransform4x2AvxUnit()
54 MS_FLOAT32X8 src[16]; in OutputTransform4x2ReluAvxUnit()
55 MS_FLOAT32X8 t[8]; in OutputTransform4x2ReluAvxUnit()
56 MS_FLOAT32X8 m[4]; in OutputTransform4x2ReluAvxUnit()
57 MS_FLOAT32X8 zero = MS_MOV256_F32(0); in OutputTransform4x2ReluAvxUnit()
59 MS_FLOAT32X8 bias_ptr = MS_LD256_F32(bias_data); in OutputTransform4x2ReluAvxUnit()
89 MS_FLOAT32X8 src[16]; in OutputTransform4x2Relu6AvxUnit()
[all …]
Dscale_fp32.c29 MS_FLOAT32X8 scale_8 = MS_MOV256_F32(scale[i]); in ScaleInner()
30 MS_FLOAT32X8 offset_8 = MS_MOV256_F32(offset[i]); in ScaleInner()
33 MS_FLOAT32X8 data = MS_LD256_F32(in_data + in_offset); in ScaleInner()
34 MS_FLOAT32X8 result = MS_MLA256_F32(offset_8, data, scale_8); in ScaleInner()
64 MS_FLOAT32X8 scale_8 = MS_LD256_F32(scale + index); in ScaleAxis()
65 MS_FLOAT32X8 offset_8 = MS_LD256_F32(offset + index); in ScaleAxis()
66 MS_FLOAT32X8 data = MS_LD256_F32(in_data + in_offset); in ScaleAxis()
67 MS_FLOAT32X8 result = MS_MLA256_F32(offset_8, data, scale_8); in ScaleAxis()
108 MS_FLOAT32X8 zeros_8 = {0, 0, 0, 0, 0, 0, 0, 0}; in ScaleInnerRelu()
119 MS_FLOAT32X8 scale_8 = MS_MOV256_F32(scale[i]); in ScaleInnerRelu()
[all …]
Dadd_fp32.c22 MS_FLOAT32X8 vin0_opt_8 = MS_MOV256_F32(in0[0]); in ElementOptAdd()
23 MS_FLOAT32X8 vin1_opt_8 = MS_MOV256_F32(in1[0]); in ElementOptAdd()
33 MS_FLOAT32X8 vin1 = MS_LD256_F32(in1 + index); in ElementOptAdd()
34 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0_opt_8, vin1); in ElementOptAdd()
51 MS_FLOAT32X8 vin0 = MS_LD256_F32(in0 + index); in ElementOptAdd()
52 MS_FLOAT32X8 vout = MS_ADD256_F32(vin0, vin1_opt_8); in ElementOptAdd()
122 MS_FLOAT32X8 vin0_opt_8 = MS_MOV256_F32(in0[0]); in ElementOptAddRelu()
123 MS_FLOAT32X8 vin1_opt_8 = MS_MOV256_F32(in1[0]); in ElementOptAddRelu()
124 MS_FLOAT32X8 zeros_8 = MS_MOV256_F32(0.0f); in ElementOptAddRelu()
135 MS_FLOAT32X8 vin1 = MS_LD256_F32(in1 + index); in ElementOptAddRelu()
[all …]
Dmul_fp32.c29 MS_FLOAT32X8 vin0 = MS_LD256_F32(in0 + index); in ElementMul()
30 MS_FLOAT32X8 vin1 = MS_LD256_F32(in1 + index); in ElementMul()
31 MS_FLOAT32X8 vout = MS_MUL256_F32(vin0, vin1); in ElementMul()
52 MS_FLOAT32X8 zeros_8 = MS_MOV256_F32(0.0f); in ElementMulRelu()
54 MS_FLOAT32X8 vin0 = MS_LD256_F32(in0 + index); in ElementMulRelu()
55 MS_FLOAT32X8 vin1 = MS_LD256_F32(in1 + index); in ElementMulRelu()
56 MS_FLOAT32X8 vout = MS_MUL256_F32(vin0, vin1); in ElementMulRelu()
81 MS_FLOAT32X8 zeros_8 = MS_MOV256_F32(0.0f); in ElementMulRelu6()
82 MS_FLOAT32X8 bounds_8 = MS_MOV256_F32(6.0f); in ElementMulRelu6()
84 MS_FLOAT32X8 vin0 = MS_LD256_F32(in0 + index); in ElementMulRelu6()
[all …]
Dresize_fp32.c168 MS_FLOAT32X8 left_w_8 = MS_MOV256_F32(x_left_weights[w]); in InterpRow()
169 MS_FLOAT32X8 right_w_8 = MS_MOV256_F32(1.0f - x_left_weights[w]); in InterpRow()
171 MS_FLOAT32X8 left = MS_LD256_F32(src_line + x_lefts[w] * in_c + c); in InterpRow()
172 MS_FLOAT32X8 right = MS_LD256_F32(src_line + x_rights[w] * in_c + c); in InterpRow()
173MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(left, left_w_8), MS_MUL256_F32(right, righ… in InterpRow()
204 MS_FLOAT32X8 bottom_w_8 = MS_MOV256_F32(y_bottom_weight); in InterpCol()
205 MS_FLOAT32X8 top_w_8 = MS_MOV256_F32(1.0f - y_bottom_weight); in InterpCol()
207 MS_FLOAT32X8 bottom = MS_LD256_F32(bottom_line + w * in_c + c); in InterpCol()
208 MS_FLOAT32X8 top = MS_LD256_F32(top_line + w * in_c + c); in InterpCol()
209MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(bottom, bottom_w_8), MS_MUL256_F32(top, to… in InterpCol()
[all …]
Dactivation_fp32.c24 MS_FLOAT32X8 zero_8 = MS_MOV256_F32(0.0f); in Fp32Relu()
46 MS_FLOAT32X8 zero_8 = MS_MOV256_F32(0.0f); in Fp32Relu6()
47 MS_FLOAT32X8 six_8 = MS_MOV256_F32(6.0f); in Fp32Relu6()
49 MS_FLOAT32X8 dst_tmp = MS_MAX256_F32(MS_LD256_F32(src + i), zero_8); in Fp32Relu6()
78 MS_FLOAT32X8 src_tmp = MS_LD256_F32(src + i); in LRelu()
79 MS_FLOAT32X8 mul_tmp = MS_MUL256_N_F32(src_tmp, alpha); in LRelu()
80 MS_FLOAT32X8 mask = MS_CMP256_F32(src_tmp, MS_MOV256_F32(0.0f), 30); in LRelu()
143 MS_FLOAT32X8 input = MS_LD256_F32(src + i); in Tanh()
168 MS_FLOAT32X8 src_value = MS_LD256_F32(src + index); in Swish()
169 MS_FLOAT32X8 sigmoid_value = MS_LD256_F32(dst + index); in Swish()
[all …]
Dexp_fp32.h59 static inline void simd_exp_avx(MS_FLOAT32X8 input, float *dst) { in simd_exp_avx()
60 static MS_FLOAT32X8 maxv = {88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f}; in simd_exp_avx()
61 static MS_FLOAT32X8 minv = {-88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f}; in simd_exp_avx()
62 static MS_FLOAT32X8 param[] = { in simd_exp_avx()
71 MS_FLOAT32X8 decimal = MS_SUB256_F32(input, MS_MUL256_F32(MS_CVT256EPI32_PS(integer), param[0])); in simd_exp_avx()
73MS_FLOAT32X8 tmp = MS_MUL256_F32(decimal, (MS_ADD256_F32(param[2], MS_MUL256_F32(decimal, param[1]… in simd_exp_avx()
75MS_FLOAT32X8 decimal_exp = MS_ADD256_F32(param[5], MS_MUL256_F32(decimal, MS_ADD256_F32(param[5], … in simd_exp_avx()
Dinstance_norm_fp32.c87 MS_FLOAT32X8 meanv8 = MS_MOV256_F32(mean); in InstanceNorm()
88 MS_FLOAT32X8 denov8 = MS_MOV256_F32(deno); in InstanceNorm()
90 MS_FLOAT32X8 srcv8 = MS_LD256_F32(src + index); in InstanceNorm()
91 MS_FLOAT32X8 dstv8 = in InstanceNorm()
275 MS_FLOAT32X8 hw_planev = MS_MOV256_F32((float)(hw_plane)); in InstanceNormNC8HW8()
284 MS_FLOAT32X8 mean = MS_MOV256_F32(0.0f), mean1 = MS_MOV256_F32(0.0f); in InstanceNormNC8HW8()
285 MS_FLOAT32X8 squ_m = MS_MOV256_F32(0.0f), squ_m1 = MS_MOV256_F32(0.0f); in InstanceNormNC8HW8()
287MS_FLOAT32X8 srcv = MS_LD256_F32(src + index * C8NUM), srcv1 = MS_LD256_F32(src1 + index * C8NUM); in InstanceNormNC8HW8()
288 MS_FLOAT32X8 squarev = MS_MUL256_F32(srcv, srcv), squarev1 = MS_MUL256_F32(srcv1, srcv1); in InstanceNormNC8HW8()
298 MS_FLOAT32X8 deno = in InstanceNormNC8HW8()
[all …]
Dsub_fp32.c20 MS_FLOAT32X8 vin0_opt_8 = MS_MOV256_F32(in0[0]); in ElementOptSub()
21 MS_FLOAT32X8 vin1_opt_8 = MS_MOV256_F32(in1[0]); in ElementOptSub()
31 MS_FLOAT32X8 vin1 = MS_LD256_F32(in1 + index); in ElementOptSub()
32 MS_FLOAT32X8 vout = MS_SUB256_F32(vin0_opt_8, vin1); in ElementOptSub()
49 MS_FLOAT32X8 vin0 = MS_LD256_F32(in0 + index); in ElementOptSub()
50 MS_FLOAT32X8 vout = MS_SUB256_F32(vin0, vin1_opt_8); in ElementOptSub()
179 MS_FLOAT32X8 vin0 = MS_LD256_F32(in0 + index); in ElementSub()
180 MS_FLOAT32X8 vin1 = MS_LD256_F32(in1 + index); in ElementSub()
181 MS_FLOAT32X8 vout = MS_SUB256_F32(vin0, vin1); in ElementSub()
Dpooling_fp32.c36 MS_FLOAT32X8 min_value_8 = MS_MOV256_F32(minf); in AvgPooling()
37 MS_FLOAT32X8 max_value_8 = MS_MOV256_F32(maxf); in AvgPooling()
70 MS_FLOAT32X8 tmp_avg = MS_MOV256_F32(0); in AvgPooling()
151 MS_FLOAT32X8 min_value_8 = MS_MOV256_F32(minf); in MaxPooling()
152 MS_FLOAT32X8 max_value_8 = MS_MOV256_F32(maxf); in MaxPooling()
185 MS_FLOAT32X8 tmp_max = MS_MOV256_F32(-FLT_MAX); in MaxPooling()
Dprelu_fp32.c141 MS_FLOAT32X8 src_tmp = MS_LD256_F32(input + i); in PReluShareChannel()
142 MS_FLOAT32X8 mul_tmp = MS_MUL256_N_F32(src_tmp, slope); in PReluShareChannel()
143 MS_FLOAT32X8 mask = MS_CMP256_F32(src_tmp, MS_MOV256_F32(0.0f), mask_offset); in PReluShareChannel()
Dreduce_fp32.c473 MS_FLOAT32X8 tmp = {0, 0, 0, 0, 0, 0, 0, 0}; in ReduceSumDim2Axis0()
505 MS_FLOAT32X8 tmp_arr_8 = MS_MOV256_F32(tmp_arr[0]); in ReduceSumDim2Axis1()
507 MS_FLOAT32X8 src_in = MS_LD256_F32(src_data + k); in ReduceSumDim2Axis1()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/
Dms_simd_instructions.h91 #define MS_FLOAT32X8 __m256 macro
215 static inline MS_FLOAT32X8 MS_SQRTFX8_F32(MS_FLOAT32X8 src) { in MS_SQRTFX8_F32()
216 MS_FLOAT32X8 dst; in MS_SQRTFX8_F32()
229 MS_FLOAT32X8 src##1 = MS_LD256_F32(input_ptr + 0 * num); \
230 MS_FLOAT32X8 src##2 = MS_LD256_F32(input_ptr + 1 * num); \
231 MS_FLOAT32X8 src##3 = MS_LD256_F32(input_ptr + 2 * num); \
232 MS_FLOAT32X8 src##4 = MS_LD256_F32(input_ptr + 3 * num); \
233 MS_FLOAT32X8 src##5 = MS_LD256_F32(input_ptr + 4 * num); \
234 MS_FLOAT32X8 src##6 = MS_LD256_F32(input_ptr + 5 * num); \
235 MS_FLOAT32X8 src##7 = MS_LD256_F32(input_ptr + 6 * num); \
[all …]