Home
last modified time | relevance | path

Searched refs:MS_F32X4_GETI (Results 1 – 7 of 7) sorted by relevance

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/sse/
DTiledC4MatMulFp32.c54 __m128 dst1 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src1, 0))); in TiledC4MatmulFp32()
55 __m128 dst2 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src2, 0))); in TiledC4MatmulFp32()
56 __m128 dst3 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src3, 0))); in TiledC4MatmulFp32()
57 __m128 dst4 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src4, 0))); in TiledC4MatmulFp32()
59 TiledC4MatmulFp32_Transfer(&dst1, &dst2, &dst3, &dst4, weight_data[j], MS_F32X4_GETI(src1, j), in TiledC4MatmulFp32()
60MS_F32X4_GETI(src2, j), MS_F32X4_GETI(src3, j), MS_F32X4_GETI(src4, j)); in TiledC4MatmulFp32()
64 __m128 dst5 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src1, 0))); in TiledC4MatmulFp32()
65 __m128 dst6 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src2, 0))); in TiledC4MatmulFp32()
66 __m128 dst7 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src3, 0))); in TiledC4MatmulFp32()
67 __m128 dst8 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src4, 0))); in TiledC4MatmulFp32()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/
Dms_simd_instructions.h23 #define MS_F32X4_GETI(src, i) src[i] macro
29 #define MS_F32X4_GETI(src, i) src.m128_f32[i] macro
32 #define MS_F32X4_GETI(src, i) src[i] macro
159 MS_F32X4_GETI(dst, 0) = sqrtf(MS_F32X4_GETI(src, 0)); in MS_SQRTFX4_F32()
160 MS_F32X4_GETI(dst, 1) = sqrtf(MS_F32X4_GETI(src, 1)); in MS_SQRTFX4_F32()
161 MS_F32X4_GETI(dst, 2) = sqrtf(MS_F32X4_GETI(src, 2)); in MS_SQRTFX4_F32()
162 MS_F32X4_GETI(dst, 3) = sqrtf(MS_F32X4_GETI(src, 3)); in MS_SQRTFX4_F32()
206 MS_F32X4_GETI(dst, 0) = erff(MS_F32X4_GETI(src, 0)); in MS_ERFX4_F32()
207 MS_F32X4_GETI(dst, 1) = erff(MS_F32X4_GETI(src, 1)); in MS_ERFX4_F32()
208 MS_F32X4_GETI(dst, 2) = erff(MS_F32X4_GETI(src, 2)); in MS_ERFX4_F32()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dpower_fp32.h42 MS_F32X4_GETI(result, i) = powf(MS_F32X4_GETI(x, i), *exponent); in StdPowerSimd()
Dwinograd_utils.c353 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x2Unit()
418 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x2ReluUnit()
488 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x2Relu6Unit()
557 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x3Unit()
629 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x3ReluUnit()
707 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x3Relu6Unit()
784 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform6x2Unit()
859 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform6x2ReluUnit()
939 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform6x2Relu6Unit()
1013 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform6x3Unit()
[all …]
Dinstance_norm_fp32.c48 mean += MS_F32X4_GETI(src128, i); in InstanceNorm()
49 squ_m += MS_F32X4_GETI(square128, i); in InstanceNorm()
63 mean += MS_F32X4_GETI(srcv, i); in InstanceNorm()
64 squ_m += MS_F32X4_GETI(squarev, i); in InstanceNorm()
Dpower_fp32.c94 output[i + j] = PowerScalarFun_(MS_F32X4_GETI(tmp_4, j), exponent + i + j); in PowerSingle()
Dconv_depthwise_fp32.c633 cur_dst[i] = MS_F32X4_GETI(res0, i); in ConvDw3x3Line()
634 cur_dst[ori_channel + i] = MS_F32X4_GETI(res1, i); in ConvDw3x3Line()
664 cur_dst[i] = MS_F32X4_GETI(res0, i); in ConvDw3x3Line()