Home
last modified time | relevance | path

Searched refs:MS_MULQ_F32 (Results 1 – 10 of 10) sorted by relevance

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dinstance_norm_fp32.c57 MS_FLOAT32X4 squarev = MS_MULQ_F32(srcv, srcv); in InstanceNorm()
103 … MS_ADDQ_F32(MS_MULQ_F32(MS_SUBQ_F32(srcv4, meanv4), denov4), MS_MOVQ_F32(*(beta_data + c))); in InstanceNorm()
131 MS_FLOAT32X4 squarev = MS_MULQ_F32(srcv, srcv), squarev1 = MS_MULQ_F32(srcv1, srcv1); in InstanceNormC4HW4ArmSse()
132 MS_FLOAT32X4 squarev2 = MS_MULQ_F32(srcv2, srcv2), squarev3 = MS_MULQ_F32(srcv3, srcv3); in InstanceNormC4HW4ArmSse()
139 …MS_FLOAT32X4 deno = MS_ADDQ_F32(MS_SUBQ_F32(squ_m, MS_MULQ_F32(mean, mean)), MS_MOVQ_F32(param->ep… in InstanceNormC4HW4ArmSse()
140 …MS_FLOAT32X4 deno1 = MS_ADDQ_F32(MS_SUBQ_F32(squ_m1, MS_MULQ_F32(mean1, mean1)), MS_MOVQ_F32(param… in InstanceNormC4HW4ArmSse()
141 …MS_FLOAT32X4 deno2 = MS_ADDQ_F32(MS_SUBQ_F32(squ_m2, MS_MULQ_F32(mean2, mean2)), MS_MOVQ_F32(param… in InstanceNormC4HW4ArmSse()
142 …MS_FLOAT32X4 deno3 = MS_ADDQ_F32(MS_SUBQ_F32(squ_m3, MS_MULQ_F32(mean3, mean3)), MS_MOVQ_F32(param… in InstanceNormC4HW4ArmSse()
149 …MS_FLOAT32X4 gammav = MS_MULQ_F32(MS_LDQ_F32(gamma_data + c), deno); // deno * gamma_d… in InstanceNormC4HW4ArmSse()
150 …MS_FLOAT32X4 gammav1 = MS_MULQ_F32(MS_LDQ_F32(gamma_data + c + C4NUM), deno1); // deno * gamma_d… in InstanceNormC4HW4ArmSse()
[all …]
Dexp_fp32.h42 MS_FLOAT32X4 decimal = MS_SUBQ_F32(input, MS_MULQ_F32(MS_CVTQEPI32_PS(integer), param[0])); in VexpFp32()
44 MS_FLOAT32X4 tmp = MS_MULQ_F32(decimal, (MS_ADDQ_F32(param[2], MS_MULQ_F32(decimal, param[1])))); in VexpFp32()
45 …tmp = MS_MULQ_F32(decimal, MS_ADDQ_F32(param[4], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[3], tmp)))… in VexpFp32()
46 …MS_FLOAT32X4 decimal_exp = MS_ADDQ_F32(param[5], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[5], tmp))); in VexpFp32()
47 return MS_MULQ_F32(decimal_exp, MS_CAST_F32_S32(int_exp)); in VexpFp32()
Dpower_fp32.c26 result = MS_MULQ_F32(result, x); in OptimizedPowerSimd()
28 x = MS_MULQ_F32(x, x); in OptimizedPowerSimd()
74 …MS_FLOAT32X4 result = PowerSimdFun_(MS_ADDQ_F32(MS_MULQ_F32(scale_4, MS_LDQ_F32(input + i)), shift… in PowerBroadCast()
91 MS_FLOAT32X4 tmp_4 = MS_ADDQ_F32(MS_MULQ_F32(scale_4, MS_LDQ_F32(input + i)), shift_4); in PowerSingle()
Dactivation_fp32.c179 MS_FLOAT32X4 result = MS_MULQ_F32(src_value, sigmoid_value); in Swish()
257 MS_FLOAT32X4 res = MS_MULQ_F32( in Gelu()
258 MS_MULQ_F32(para8, in), in Gelu()
260 … MS_TANHX4_F32(MS_MULQ_F32(MS_ADDQ_F32(para5, MS_MULQ_F32(MS_MULQ_F32(para6, in), in)), in)))); in Gelu()
275 …MS_FLOAT32X4 res = MS_MULQ_F32(MS_MULQ_F32(para3, in), MS_ADDQ_F32(para2, MS_ERFX4_F32(MS_DIVQ_F32… in Gelu()
Dresize_fp32.c183 … MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(left, left_w), MS_MULQ_F32(right, right_w)); in InterpRow()
219 … MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(bottom, bottom_w), MS_MULQ_F32(top, top_w)); in InterpCol()
347 MS_FLOAT32X4 dst0 = MS_MULQ_F32(src0_vec, weight0_vec); in BicubicInterpRow()
348 MS_FLOAT32X4 dst1 = MS_MULQ_F32(src1_vec, weight1_vec); in BicubicInterpRow()
349 MS_FLOAT32X4 dst2 = MS_MULQ_F32(src2_vec, weight2_vec); in BicubicInterpRow()
350 MS_FLOAT32X4 dst3 = MS_MULQ_F32(src3_vec, weight3_vec); in BicubicInterpRow()
401 MS_FLOAT32X4 dst1 = MS_MULQ_F32(src0_vec, weight0_vec); in BicubicInterpCol()
402 MS_FLOAT32X4 dst2 = MS_MULQ_F32(src1_vec, weight1_vec); in BicubicInterpCol()
403 MS_FLOAT32X4 dst3 = MS_MULQ_F32(src2_vec, weight2_vec); in BicubicInterpCol()
404 MS_FLOAT32X4 dst4 = MS_MULQ_F32(src3_vec, weight3_vec); in BicubicInterpCol()
Dmul_fp32.c39 MS_FLOAT32X4 vout = MS_MULQ_F32(vin0, vin1); in ElementMul()
66 MS_FLOAT32X4 vout = MS_MULQ_F32(vin0, vin1); in ElementMulRelu()
96 MS_FLOAT32X4 vout = MS_MINQ_F32(MS_MAXQ_F32(MS_MULQ_F32(vin0, vin1), zeros), bounds); in ElementMulRelu6()
202 MS_FLOAT32X4 vout = MS_MULQ_F32(vin0_opt, vin1); in ElementOptMul()
222 MS_FLOAT32X4 vout = MS_MULQ_F32(vin0, vin1_opt); in ElementOptMul()
250 MS_FLOAT32X4 vout = MS_MAXQ_F32(MS_MULQ_F32(vin0_opt, vin1), zeros); in ElementOptMulRelu()
272 MS_FLOAT32X4 vout = MS_MAXQ_F32(MS_MULQ_F32(vin0, vin1_opt), zeros); in ElementOptMulRelu()
302 MS_FLOAT32X4 vout = MS_MINQ_F32(MS_MAXQ_F32(MS_MULQ_F32(vin0_opt, vin1), zeros), bounds); in ElementOptMulRelu6()
326 MS_FLOAT32X4 vout = MS_MINQ_F32(MS_MAXQ_F32(MS_MULQ_F32(vin0, vin1_opt), zeros), bounds); in ElementOptMulRelu6()
Dexp_fp32.c56 simd_exp(MS_MULQ_F32(MS_LDQ_F32(src + i), scale), dst + i); in ExpFusionFp32()
70 MS_STQ_F32(dst + i, MS_MULQ_F32(MS_LDQ_F32(dst + i), scale)); in ExpFusionFp32()
Dprelu_fp32.c120 MS_FLOAT32X4 mul = MS_MULQ_F32(in, s); in PRelu()
Dconv_depthwise_fp32.c601 MS_FLOAT32X4 acc0 = MS_MULQ_F32(MS_LDQ_F32(line0), g00); in ConvDw3x3Line()
602 MS_FLOAT32X4 acc1 = MS_MULQ_F32(MS_LDQ_F32(line0 + 4), g01); in ConvDw3x3Line()
603 MS_FLOAT32X4 acc2 = MS_MULQ_F32(MS_LDQ_F32(line0 + 8), g02); in ConvDw3x3Line()
604 MS_FLOAT32X4 acc3 = MS_MULQ_F32(MS_LDQ_F32(line0 + 12), g03); in ConvDw3x3Line()
640 MS_FLOAT32X4 acc0 = MS_MULQ_F32(MS_LDQ_F32(line0), g00); in ConvDw3x3Line()
641 MS_FLOAT32X4 acc1 = MS_MULQ_F32(MS_LDQ_F32(line0 + 4), g01); in ConvDw3x3Line()
642 MS_FLOAT32X4 acc2 = MS_MULQ_F32(MS_LDQ_F32(line0 + 8), g02); in ConvDw3x3Line()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/
Dms_simd_instructions.h63 #define MS_MULQ_F32(src1, src2) vmulq_f32(src1, src2) macro
140 #define MS_MULQ_F32(src1, src2) _mm_mul_ps(src1, src2) macro
195 MS_FLOAT32X4 square = MS_MULQ_F32(src, src); in MS_TANHX4_F32()
196 MS_FLOAT32X4 a = MS_MULQ_F32( in MS_TANHX4_F32()
197 …MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(square, data0), square), data1), squar… in MS_TANHX4_F32()
199MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(data3, square), data4), square), data5… in MS_TANHX4_F32()