/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | instance_norm_fp32.c | 57 MS_FLOAT32X4 squarev = MS_MULQ_F32(srcv, srcv); in InstanceNorm() 103 … MS_ADDQ_F32(MS_MULQ_F32(MS_SUBQ_F32(srcv4, meanv4), denov4), MS_MOVQ_F32(*(beta_data + c))); in InstanceNorm() 131 MS_FLOAT32X4 squarev = MS_MULQ_F32(srcv, srcv), squarev1 = MS_MULQ_F32(srcv1, srcv1); in InstanceNormC4HW4ArmSse() 132 MS_FLOAT32X4 squarev2 = MS_MULQ_F32(srcv2, srcv2), squarev3 = MS_MULQ_F32(srcv3, srcv3); in InstanceNormC4HW4ArmSse() 139 …MS_FLOAT32X4 deno = MS_ADDQ_F32(MS_SUBQ_F32(squ_m, MS_MULQ_F32(mean, mean)), MS_MOVQ_F32(param->ep… in InstanceNormC4HW4ArmSse() 140 …MS_FLOAT32X4 deno1 = MS_ADDQ_F32(MS_SUBQ_F32(squ_m1, MS_MULQ_F32(mean1, mean1)), MS_MOVQ_F32(param… in InstanceNormC4HW4ArmSse() 141 …MS_FLOAT32X4 deno2 = MS_ADDQ_F32(MS_SUBQ_F32(squ_m2, MS_MULQ_F32(mean2, mean2)), MS_MOVQ_F32(param… in InstanceNormC4HW4ArmSse() 142 …MS_FLOAT32X4 deno3 = MS_ADDQ_F32(MS_SUBQ_F32(squ_m3, MS_MULQ_F32(mean3, mean3)), MS_MOVQ_F32(param… in InstanceNormC4HW4ArmSse() 149 …MS_FLOAT32X4 gammav = MS_MULQ_F32(MS_LDQ_F32(gamma_data + c), deno); // deno * gamma_d… in InstanceNormC4HW4ArmSse() 150 …MS_FLOAT32X4 gammav1 = MS_MULQ_F32(MS_LDQ_F32(gamma_data + c + C4NUM), deno1); // deno * gamma_d… in InstanceNormC4HW4ArmSse() [all …]
|
D | exp_fp32.h | 42 MS_FLOAT32X4 decimal = MS_SUBQ_F32(input, MS_MULQ_F32(MS_CVTQEPI32_PS(integer), param[0])); in VexpFp32() 44 MS_FLOAT32X4 tmp = MS_MULQ_F32(decimal, (MS_ADDQ_F32(param[2], MS_MULQ_F32(decimal, param[1])))); in VexpFp32() 45 …tmp = MS_MULQ_F32(decimal, MS_ADDQ_F32(param[4], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[3], tmp)))… in VexpFp32() 46 …MS_FLOAT32X4 decimal_exp = MS_ADDQ_F32(param[5], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[5], tmp))); in VexpFp32() 47 return MS_MULQ_F32(decimal_exp, MS_CAST_F32_S32(int_exp)); in VexpFp32()
|
D | power_fp32.c | 26 result = MS_MULQ_F32(result, x); in OptimizedPowerSimd() 28 x = MS_MULQ_F32(x, x); in OptimizedPowerSimd() 74 …MS_FLOAT32X4 result = PowerSimdFun_(MS_ADDQ_F32(MS_MULQ_F32(scale_4, MS_LDQ_F32(input + i)), shift… in PowerBroadCast() 91 MS_FLOAT32X4 tmp_4 = MS_ADDQ_F32(MS_MULQ_F32(scale_4, MS_LDQ_F32(input + i)), shift_4); in PowerSingle()
|
D | activation_fp32.c | 179 MS_FLOAT32X4 result = MS_MULQ_F32(src_value, sigmoid_value); in Swish() 257 MS_FLOAT32X4 res = MS_MULQ_F32( in Gelu() 258 MS_MULQ_F32(para8, in), in Gelu() 260 … MS_TANHX4_F32(MS_MULQ_F32(MS_ADDQ_F32(para5, MS_MULQ_F32(MS_MULQ_F32(para6, in), in)), in)))); in Gelu() 275 …MS_FLOAT32X4 res = MS_MULQ_F32(MS_MULQ_F32(para3, in), MS_ADDQ_F32(para2, MS_ERFX4_F32(MS_DIVQ_F32… in Gelu()
|
D | resize_fp32.c | 183 … MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(left, left_w), MS_MULQ_F32(right, right_w)); in InterpRow() 219 … MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(bottom, bottom_w), MS_MULQ_F32(top, top_w)); in InterpCol() 347 MS_FLOAT32X4 dst0 = MS_MULQ_F32(src0_vec, weight0_vec); in BicubicInterpRow() 348 MS_FLOAT32X4 dst1 = MS_MULQ_F32(src1_vec, weight1_vec); in BicubicInterpRow() 349 MS_FLOAT32X4 dst2 = MS_MULQ_F32(src2_vec, weight2_vec); in BicubicInterpRow() 350 MS_FLOAT32X4 dst3 = MS_MULQ_F32(src3_vec, weight3_vec); in BicubicInterpRow() 401 MS_FLOAT32X4 dst1 = MS_MULQ_F32(src0_vec, weight0_vec); in BicubicInterpCol() 402 MS_FLOAT32X4 dst2 = MS_MULQ_F32(src1_vec, weight1_vec); in BicubicInterpCol() 403 MS_FLOAT32X4 dst3 = MS_MULQ_F32(src2_vec, weight2_vec); in BicubicInterpCol() 404 MS_FLOAT32X4 dst4 = MS_MULQ_F32(src3_vec, weight3_vec); in BicubicInterpCol()
|
D | mul_fp32.c | 39 MS_FLOAT32X4 vout = MS_MULQ_F32(vin0, vin1); in ElementMul() 66 MS_FLOAT32X4 vout = MS_MULQ_F32(vin0, vin1); in ElementMulRelu() 96 MS_FLOAT32X4 vout = MS_MINQ_F32(MS_MAXQ_F32(MS_MULQ_F32(vin0, vin1), zeros), bounds); in ElementMulRelu6() 202 MS_FLOAT32X4 vout = MS_MULQ_F32(vin0_opt, vin1); in ElementOptMul() 222 MS_FLOAT32X4 vout = MS_MULQ_F32(vin0, vin1_opt); in ElementOptMul() 250 MS_FLOAT32X4 vout = MS_MAXQ_F32(MS_MULQ_F32(vin0_opt, vin1), zeros); in ElementOptMulRelu() 272 MS_FLOAT32X4 vout = MS_MAXQ_F32(MS_MULQ_F32(vin0, vin1_opt), zeros); in ElementOptMulRelu() 302 MS_FLOAT32X4 vout = MS_MINQ_F32(MS_MAXQ_F32(MS_MULQ_F32(vin0_opt, vin1), zeros), bounds); in ElementOptMulRelu6() 326 MS_FLOAT32X4 vout = MS_MINQ_F32(MS_MAXQ_F32(MS_MULQ_F32(vin0, vin1_opt), zeros), bounds); in ElementOptMulRelu6()
|
D | exp_fp32.c | 56 simd_exp(MS_MULQ_F32(MS_LDQ_F32(src + i), scale), dst + i); in ExpFusionFp32() 70 MS_STQ_F32(dst + i, MS_MULQ_F32(MS_LDQ_F32(dst + i), scale)); in ExpFusionFp32()
|
D | prelu_fp32.c | 120 MS_FLOAT32X4 mul = MS_MULQ_F32(in, s); in PRelu()
|
D | conv_depthwise_fp32.c | 601 MS_FLOAT32X4 acc0 = MS_MULQ_F32(MS_LDQ_F32(line0), g00); in ConvDw3x3Line() 602 MS_FLOAT32X4 acc1 = MS_MULQ_F32(MS_LDQ_F32(line0 + 4), g01); in ConvDw3x3Line() 603 MS_FLOAT32X4 acc2 = MS_MULQ_F32(MS_LDQ_F32(line0 + 8), g02); in ConvDw3x3Line() 604 MS_FLOAT32X4 acc3 = MS_MULQ_F32(MS_LDQ_F32(line0 + 12), g03); in ConvDw3x3Line() 640 MS_FLOAT32X4 acc0 = MS_MULQ_F32(MS_LDQ_F32(line0), g00); in ConvDw3x3Line() 641 MS_FLOAT32X4 acc1 = MS_MULQ_F32(MS_LDQ_F32(line0 + 4), g01); in ConvDw3x3Line() 642 MS_FLOAT32X4 acc2 = MS_MULQ_F32(MS_LDQ_F32(line0 + 8), g02); in ConvDw3x3Line()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ |
D | ms_simd_instructions.h | 63 #define MS_MULQ_F32(src1, src2) vmulq_f32(src1, src2) macro 140 #define MS_MULQ_F32(src1, src2) _mm_mul_ps(src1, src2) macro 195 MS_FLOAT32X4 square = MS_MULQ_F32(src, src); in MS_TANHX4_F32() 196 MS_FLOAT32X4 a = MS_MULQ_F32( in MS_TANHX4_F32() 197 …MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(square, data0), square), data1), squar… in MS_TANHX4_F32() 199 …MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(data3, square), data4), square), data5… in MS_TANHX4_F32()
|