Searched refs:MS_F32X4_GETI (Results 1 – 7 of 7) sorted by relevance
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/sse/ |
D | TiledC4MatMulFp32.c | 54 __m128 dst1 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src1, 0))); in TiledC4MatmulFp32() 55 __m128 dst2 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src2, 0))); in TiledC4MatmulFp32() 56 __m128 dst3 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src3, 0))); in TiledC4MatmulFp32() 57 __m128 dst4 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src4, 0))); in TiledC4MatmulFp32() 59 TiledC4MatmulFp32_Transfer(&dst1, &dst2, &dst3, &dst4, weight_data[j], MS_F32X4_GETI(src1, j), in TiledC4MatmulFp32() 60 … MS_F32X4_GETI(src2, j), MS_F32X4_GETI(src3, j), MS_F32X4_GETI(src4, j)); in TiledC4MatmulFp32() 64 __m128 dst5 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src1, 0))); in TiledC4MatmulFp32() 65 __m128 dst6 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src2, 0))); in TiledC4MatmulFp32() 66 __m128 dst7 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src3, 0))); in TiledC4MatmulFp32() 67 __m128 dst8 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src4, 0))); in TiledC4MatmulFp32() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ |
D | ms_simd_instructions.h | 23 #define MS_F32X4_GETI(src, i) src[i] macro 29 #define MS_F32X4_GETI(src, i) src.m128_f32[i] macro 32 #define MS_F32X4_GETI(src, i) src[i] macro 159 MS_F32X4_GETI(dst, 0) = sqrtf(MS_F32X4_GETI(src, 0)); in MS_SQRTFX4_F32() 160 MS_F32X4_GETI(dst, 1) = sqrtf(MS_F32X4_GETI(src, 1)); in MS_SQRTFX4_F32() 161 MS_F32X4_GETI(dst, 2) = sqrtf(MS_F32X4_GETI(src, 2)); in MS_SQRTFX4_F32() 162 MS_F32X4_GETI(dst, 3) = sqrtf(MS_F32X4_GETI(src, 3)); in MS_SQRTFX4_F32() 206 MS_F32X4_GETI(dst, 0) = erff(MS_F32X4_GETI(src, 0)); in MS_ERFX4_F32() 207 MS_F32X4_GETI(dst, 1) = erff(MS_F32X4_GETI(src, 1)); in MS_ERFX4_F32() 208 MS_F32X4_GETI(dst, 2) = erff(MS_F32X4_GETI(src, 2)); in MS_ERFX4_F32() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | power_fp32.h | 42 MS_F32X4_GETI(result, i) = powf(MS_F32X4_GETI(x, i), *exponent); in StdPowerSimd()
|
D | winograd_utils.c | 353 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x2Unit() 418 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x2ReluUnit() 488 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x2Relu6Unit() 557 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x3Unit() 629 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x3ReluUnit() 707 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform4x3Relu6Unit() 784 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform6x2Unit() 859 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform6x2ReluUnit() 939 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform6x2Relu6Unit() 1013 dst_data[i + dst_k_offset + k * out_c] = MS_F32X4_GETI(m[k + m_k_offset], i); in OutputTransform6x3Unit() [all …]
|
D | instance_norm_fp32.c | 48 mean += MS_F32X4_GETI(src128, i); in InstanceNorm() 49 squ_m += MS_F32X4_GETI(square128, i); in InstanceNorm() 63 mean += MS_F32X4_GETI(srcv, i); in InstanceNorm() 64 squ_m += MS_F32X4_GETI(squarev, i); in InstanceNorm()
|
D | power_fp32.c | 94 output[i + j] = PowerScalarFun_(MS_F32X4_GETI(tmp_4, j), exponent + i + j); in PowerSingle()
|
D | conv_depthwise_fp32.c | 633 cur_dst[i] = MS_F32X4_GETI(res0, i); in ConvDw3x3Line() 634 cur_dst[ori_channel + i] = MS_F32X4_GETI(res1, i); in ConvDw3x3Line() 664 cur_dst[i] = MS_F32X4_GETI(res0, i); in ConvDw3x3Line()
|