Home
last modified time | relevance | path

Searched refs:dst7 (Results 1 – 22 of 22) sorted by relevance

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/sse/
Dsse_common.h89 … __m128 *dst6, __m128 *dst7, __m128 *dst8, int stride, int extra_stride, int r) { in WriteCol1() argument
101 _mm_store_ss(*dst, *dst7); in WriteCol1()
108 __m128 *dst6, __m128 *dst7, __m128 *dst8, int stride, int r) { in WriteCol2() argument
126 _mm_store_ss(*dst, *dst7); in WriteCol2()
127 *dst7 = _mm_shuffle_ps(*dst7, *dst7, _MM_SHUFFLE(0, 3, 2, 1)); in WriteCol2()
128 _mm_store_ss(*dst, *dst7); in WriteCol2()
133 __m128 *dst6, __m128 *dst7, __m128 *dst8, int stride, int r) { in WriteCol2Opt() argument
151 _mm_store_ss(*dst, *dst7); in WriteCol2Opt()
152 *dst7 = _mm_shuffle_ps(*dst7, *dst7, _MM_SHUFFLE(0, 3, 2, 1)); in WriteCol2Opt()
153 _mm_store_ss(*dst + 1, *dst7); in WriteCol2Opt()
[all …]
DMatMul_Sse.c118 …__m128 dst5 = _mm_setzero_ps(), dst6 = _mm_setzero_ps(), dst7 = _mm_setzero_ps(), dst8 = _mm_setze… in MatmulFloatSse64Opt() local
131 dst7 = _mm_add_ps(dst7, tmp3), dst8 = _mm_add_ps(dst8, tmp4); in MatmulFloatSse64Opt()
136 DoBiasBlock8(bias_d, &dst1, &dst2, &dst3, &dst4, &dst5, &dst6, &dst7, &dst8); in MatmulFloatSse64Opt()
140 ActBlock8(&dst1, &dst2, &dst3, &dst4, &dst5, &dst6, &dst7, &dst8, act_type); in MatmulFloatSse64Opt()
150 _mm_storeu_ps(dst, dst7), _mm_storeu_ps(dst + 4, dst8); in MatmulFloatSse64Opt()
155 _mm_storeu_ps(c + 24, dst7), _mm_storeu_ps(c + 28, dst8); in MatmulFloatSse64Opt()
161 WriteCol1(&dst, &dst1, &dst2, &dst3, &dst4, &dst5, &dst6, &dst7, &dst8, stride, 1, r); in MatmulFloatSse64Opt()
165 WriteCol2Opt(&dst, &dst1, &dst2, &dst3, &dst4, &dst5, &dst6, &dst7, &dst8, stride, r); in MatmulFloatSse64Opt()
174 WriteCol3(&dst, &dst1, &dst2, &dst3, &dst4, &dst5, &dst6, &dst7, &dst8, stride, 3, r); in MatmulFloatSse64Opt()
178 WriteCol4(&dst, &dst1, &dst2, &dst3, &dst4, &dst5, &dst6, &dst7, &dst8, stride, 4, r); in MatmulFloatSse64Opt()
[all …]
DTiledC4MatMulFp32.c66 __m128 dst7 = _mm_mul_ps(weight_data[0], _mm_set_ps1(MS_F32X4_GETI(src3, 0))); in TiledC4MatmulFp32() local
69 TiledC4MatmulFp32_Transfer(&dst5, &dst6, &dst7, &dst8, weight_data[j], MS_F32X4_GETI(src1, j), in TiledC4MatmulFp32()
106 … TiledC4MatmulFp32_Transfer(&dst5, &dst6, &dst7, &dst8, weight_data[0], MS_F32X4_GETI(src1, 0), in TiledC4MatmulFp32()
109 … TiledC4MatmulFp32_Transfer(&dst5, &dst6, &dst7, &dst8, weight_data[1], MS_F32X4_GETI(src1, 1), in TiledC4MatmulFp32()
112 … TiledC4MatmulFp32_Transfer(&dst5, &dst6, &dst7, &dst8, weight_data[2], MS_F32X4_GETI(src1, 2), in TiledC4MatmulFp32()
119 … TiledC4MatmulFp32_Transfer(&dst5, &dst6, &dst7, &dst8, weight_data[3], MS_F32X4_GETI(src1, 3), in TiledC4MatmulFp32()
146 … TiledC4MatmulFp32_Transfer(&dst5, &dst6, &dst7, &dst8, weight_data[j], MS_F32X4_GETI(src1, j), in TiledC4MatmulFp32()
156 _mm_storeu_ps(dst + 24, dst7); in TiledC4MatmulFp32()
/third_party/ffmpeg/libavcodec/mips/
Dhevc_mc_uniw_msa.c138 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, offset_vec; in hevc_uniwgt_copy_6w_msa() local
159 ILVRL_B2_SH(zero, src3, dst6, dst7); in hevc_uniwgt_copy_6w_msa()
162 SLLI_4V(dst4, dst5, dst6, dst7, 6); in hevc_uniwgt_copy_6w_msa()
167 HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7, weight_vec, in hevc_uniwgt_copy_6w_msa()
169 dst7); in hevc_uniwgt_copy_6w_msa()
171 PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3); in hevc_uniwgt_copy_6w_msa()
200 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, offset_vec; in hevc_uniwgt_copy_8w_msa() local
263 ILVRL_B2_SH(zero, src3, dst6, dst7); in hevc_uniwgt_copy_8w_msa()
265 SLLI_4V(dst4, dst5, dst6, dst7, 6); in hevc_uniwgt_copy_8w_msa()
269 HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7, weight_vec, in hevc_uniwgt_copy_8w_msa()
[all …]
Dvc1dsp_msa.c145 v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in ff_vc1_inv_trans_4x8_msa() local
204 LD_SW8(dest, linesize, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in ff_vc1_inv_trans_4x8_msa()
206 zero_m, dst4, zero_m, dst5, zero_m, dst6, zero_m, dst7, in ff_vc1_inv_trans_4x8_msa()
207 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in ff_vc1_inv_trans_4x8_msa()
210 ILVR_H4_SW(zero_m, dst4, zero_m, dst5, zero_m, dst6, zero_m, dst7, in ff_vc1_inv_trans_4x8_msa()
211 dst4, dst5, dst6, dst7); in ff_vc1_inv_trans_4x8_msa()
222 ADD4(in_r4, dst4, in_r5, dst5, in_r6, dst6, in_r7, dst7, in ff_vc1_inv_trans_4x8_msa()
235 v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in ff_vc1_inv_trans_8x4_msa() local
283 dst4, dst5, dst6, dst7); in ff_vc1_inv_trans_8x4_msa()
306 ADD4(in4, dst4, in5, dst5, in6, dst6, in7, dst7, in4, in5, in6, in7); in ff_vc1_inv_trans_8x4_msa()
Dhevc_idct_msa.c773 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in hevc_addblk_16x16_msa() local
778 LD_UB4(temp_dst, stride, dst4, dst5, dst6, dst7); in hevc_addblk_16x16_msa()
788 UNPCK_UB_SH(dst7, dst_r3, dst_l3); in hevc_addblk_16x16_msa()
800 LD_UB4(temp_dst, stride, dst4, dst5, dst6, dst7); in hevc_addblk_16x16_msa()
818 UNPCK_UB_SH(dst7, dst_r3, dst_l3); in hevc_addblk_16x16_msa()
840 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in hevc_addblk_32x32_msa() local
847 LD_UB2(temp_dst, 16, dst6, dst7); in hevc_addblk_32x32_msa()
857 UNPCK_UB_SH(dst7, dst_r3, dst_l3); in hevc_addblk_32x32_msa()
871 LD_UB2(temp_dst, 16, dst6, dst7); in hevc_addblk_32x32_msa()
890 UNPCK_UB_SH(dst7, dst_r3, dst_l3); in hevc_addblk_32x32_msa()
[all …]
Dh264idct_msa.c122 v16i8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in avc_idct8_addblk_msa() local
227 LD_SB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in avc_idct8_addblk_msa()
230 ILVR_B4_SH(zeros, dst4, zeros, dst5, zeros, dst6, zeros, dst7, in avc_idct8_addblk_msa()
246 v16i8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in avc_idct8_dc_addblk_msa() local
256 LD_SB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in avc_idct8_dc_addblk_msa()
259 ILVR_B4_SH(zeros, dst4, zeros, dst5, zeros, dst6, zeros, dst7, in avc_idct8_dc_addblk_msa()
Dhevc_mc_bi_msa.c143 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in hevc_bi_copy_6w_msa() local
159 ILVRL_B2_SH(zero, src3, dst6, dst7); in hevc_bi_copy_6w_msa()
161 SLLI_4V(dst4, dst5, dst6, dst7, 6); in hevc_bi_copy_6w_msa()
164 HEVC_BI_RND_CLIP4_MAX_SATU(in4, in5, in6, in7, dst4, dst5, dst6, dst7, in hevc_bi_copy_6w_msa()
165 7, dst4, dst5, dst6, dst7); in hevc_bi_copy_6w_msa()
167 PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3); in hevc_bi_copy_6w_msa()
194 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in hevc_bi_copy_8w_msa() local
251 ILVRL_B2_SH(zero, src3, dst6, dst7); in hevc_bi_copy_8w_msa()
256 SLLI_4V(dst4, dst5, dst6, dst7, 6); in hevc_bi_copy_8w_msa()
260 dst7, 7, dst4, dst5, dst6, dst7); in hevc_bi_copy_8w_msa()
[all …]
Dhevc_mc_biw_msa.c444 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9, dst10; in hevc_biwgt_copy_24w_msa() local
468 ILVRL_B2_SH(zero, src4, dst6, dst7); in hevc_biwgt_copy_24w_msa()
472 SLLI_4V(dst4, dst5, dst6, dst7, 6); in hevc_biwgt_copy_24w_msa()
477 HEVC_BIW_RND_CLIP4_MAX_SATU(dst4, dst5, dst6, dst7, in8, in9, in2, in6, in hevc_biwgt_copy_24w_msa()
479 dst6, dst7); in hevc_biwgt_copy_24w_msa()
484 PCKEV_B3_UB(dst7, dst6, dst9, dst8, dst11, dst10, out3, out4, out5); in hevc_biwgt_copy_24w_msa()
2098 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8; in hevc_hv_biwgt_8t_8multx2mult_msa() local
2194 dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, in hevc_hv_biwgt_8t_8multx2mult_msa()
2197 ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l); in hevc_hv_biwgt_8t_8multx2mult_msa()
2212 ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l); in hevc_hv_biwgt_8t_8multx2mult_msa()
[all …]
Dhevcdsp_msa.c905 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in hevc_hz_8t_64w_msa() local
982 dst7 = const_vec; in hevc_hz_8t_64w_msa()
984 dst7, dst7, dst7, dst7); in hevc_hz_8t_64w_msa()
985 ST_SH(dst7, dst + 56); in hevc_hz_8t_64w_msa()
1485 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in hevc_hv_8t_8multx1mult_msa() local
1561 dst7 = const_vec; in hevc_hv_8t_8multx1mult_msa()
1563 dst7, dst7, dst7, dst7); in hevc_hv_8t_8multx1mult_msa()
1568 ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l); in hevc_hv_8t_8multx1mult_msa()
1586 dst6 = dst7; in hevc_hv_8t_8multx1mult_msa()
1617 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in hevc_hv_8t_12w_msa() local
[all …]
Dhpeldsp_msa.c504 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in common_vt_bil_and_aver_dst_16w_msa() local
517 LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in common_vt_bil_and_aver_dst_16w_msa()
520 AVER_UB4_UB(dst4, res4, dst5, res5, dst6, res6, dst7, res7, in common_vt_bil_and_aver_dst_16w_msa()
1017 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in common_hv_bil_and_aver_dst_16w_msa() local
1058 LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in common_hv_bil_and_aver_dst_16w_msa()
1073 PCKEV_AVG_ST_UB(sum7_l, sum7_r, dst7, dst); in common_hv_bil_and_aver_dst_16w_msa()
1303 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in avg_width16_msa() local
1308 LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in avg_width16_msa()
1312 AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, in avg_width16_msa()
1313 dst4, dst5, dst6, dst7); in avg_width16_msa()
[all …]
Dh264dsp_msa.c2342 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in ff_weight_h264_pixels16_8_msa() local
2389 dst5, dst6, dst7); in ff_weight_h264_pixels16_8_msa()
2390 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, src, stride); in ff_weight_h264_pixels16_8_msa()
2428 dst5, dst6, dst7); in ff_weight_h264_pixels16_8_msa()
2429 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, src, stride); in ff_weight_h264_pixels16_8_msa()
2466 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in ff_biweight_h264_pixels16_8_msa() local
2485 LD_UB8(dst, stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in ff_biweight_h264_pixels16_8_msa()
2487 XORI_B8_128_UB(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in ff_biweight_h264_pixels16_8_msa()
2492 ILVR_B4_SB(dst4, src4, dst5, src5, dst6, src6, dst7, src7, vec8, vec10, in ff_biweight_h264_pixels16_8_msa()
2494 ILVL_B4_SB(dst4, src4, dst5, src5, dst6, src6, dst7, src7, vec9, vec11, in ff_biweight_h264_pixels16_8_msa()
[all …]
Dvp9_mc_msa.c3421 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in ff_avg_bilin_32v_msa() local
3439 LD_UB4(dst + 16, dst_stride, dst4, dst5, dst6, dst7); in ff_avg_bilin_32v_msa()
3486 PCKEV_AVG_ST_UB(tmp3, tmp2, dst7, dst + 16 + 3 * dst_stride); in ff_avg_bilin_32v_msa()
3502 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in ff_avg_bilin_64v_msa() local
3522 LD_UB2(dst + 48, dst_stride, dst6, dst7); in ff_avg_bilin_64v_msa()
3571 PCKEV_AVG_ST_UB(tmp7, tmp6, dst7, dst + 48 + dst_stride); in ff_avg_bilin_64v_msa()
4132 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in avg_width16_msa() local
4138 LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in avg_width16_msa()
4142 AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, in avg_width16_msa()
4143 dst4, dst5, dst6, dst7); in avg_width16_msa()
[all …]
Dhevc_mc_uni_msa.c1488 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8; in hevc_hv_uni_8t_8multx2mult_msa() local
1563 dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, in hevc_hv_uni_8t_8multx2mult_msa()
1566 ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l); in hevc_hv_uni_8t_8multx2mult_msa()
1579 ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l); in hevc_hv_uni_8t_8multx2mult_msa()
1599 dst5 = dst7; in hevc_hv_uni_8t_8multx2mult_msa()
1636 v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8; in hevc_hv_uni_8t_12w_msa() local
1709 dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, in hevc_hv_uni_8t_12w_msa()
1712 ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l); in hevc_hv_uni_8t_12w_msa()
1725 ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l); in hevc_hv_uni_8t_12w_msa()
1745 dst5 = dst7; in hevc_hv_uni_8t_12w_msa()
[all …]
Dh264dsp_mmi.c727 MMI_LDC1(%[ftmp4], %[dst7], 0x00) in ff_h264_idct8_dc_add_8_mmi()
751 MMI_SDC1(%[ftmp4], %[dst7], 0x00) in ff_h264_idct8_dc_add_8_mmi()
762 [dst6]"r"(dst+6*stride), [dst7]"r"(dst+7*stride), in ff_h264_idct8_dc_add_8_mmi()
Dvp9_idct_msa.c713 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in vp9_iadst8x8_colcol_addblk_msa() local
763 dst7 = LD_UB(dst + 7 * dst_stride); in vp9_iadst8x8_colcol_addblk_msa()
771 res7 = (v8i16) __msa_ilvr_b((v16i8) zero, (v16i8) dst7); in vp9_iadst8x8_colcol_addblk_msa()
1310 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in vp9_iadst16_1d_columns_addblk_msa() local
1421 dst7 = LD_UB(dst + 11 * dst_stride); in vp9_iadst16_1d_columns_addblk_msa()
1422 ILVR_B2_SH(zero, dst6, zero, dst7, res6, res7); in vp9_iadst16_1d_columns_addblk_msa()
Dh264qpel_msa.c676 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in ff_avg_h264_qpel16_mc00_msa() local
680 LD_UB8(dst, stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in ff_avg_h264_qpel16_mc00_msa()
684 AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, dst4, dst5, in ff_avg_h264_qpel16_mc00_msa()
685 dst6, dst7); in ff_avg_h264_qpel16_mc00_msa()
686 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, stride); in ff_avg_h264_qpel16_mc00_msa()
690 LD_UB8(dst, stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in ff_avg_h264_qpel16_mc00_msa()
694 AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, dst4, dst5, in ff_avg_h264_qpel16_mc00_msa()
695 dst6, dst7); in ff_avg_h264_qpel16_mc00_msa()
696 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, stride); in ff_avg_h264_qpel16_mc00_msa()
1607 v8i16 hz_out7, hz_out8, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in ff_put_h264_qpel16_mc21_msa() local
[all …]
Dhevc_lpf_sao_msa.c466 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in hevc_loopfilter_luma_ver_msa() local
865 ILVRL_B2_UB(dst3, dst2, dst6, dst7); in hevc_loopfilter_luma_ver_msa()
867 ILVRL_H2_UB(dst7, dst6, dst2, dst3); in hevc_loopfilter_luma_ver_msa()
Dqpeldsp_msa.c5838 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; in avg_width16_msa() local
5843 LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); in avg_width16_msa()
5847 AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, in avg_width16_msa()
5848 dst4, dst5, dst6, dst7); in avg_width16_msa()
5849 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride); in avg_width16_msa()
/third_party/ffmpeg/libavcodec/x86/
Dcavsidct.asm104 SUMSUB_BA w, 7, 6 ; m7 = dst0, m6 = dst7
/third_party/ffmpeg/libavcodec/aarch64/
Dvp9mc_16bpp_neon.S123 .macro extmlal dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, src1, src2, src3, src4, src5, src6, …
138 smlal \dst7\().4s, v23.4h, v0.h[\offset]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dmatmul_fp32.c304 __m128 dst7 = _mm_movelh_ps(src56H, src78H); in RowMajor2Col12Major() local
328 _mm_storeu_ps(dst_c + 28, dst7); in RowMajor2Col12Major()