/external/libavc/encoder/x86/ |
D | ime_distortion_metrics_sse42.c | 116 src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); in ime_compute_sad_16x16_sse42() 117 src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); in ime_compute_sad_16x16_sse42() 118 src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); in ime_compute_sad_16x16_sse42() 119 src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd)); in ime_compute_sad_16x16_sse42() 121 est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); in ime_compute_sad_16x16_sse42() 122 est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd)); in ime_compute_sad_16x16_sse42() 123 est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); in ime_compute_sad_16x16_sse42() 124 est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd)); in ime_compute_sad_16x16_sse42() 139 src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); in ime_compute_sad_16x16_sse42() 140 src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); in ime_compute_sad_16x16_sse42() [all …]
|
D | ih264e_half_pel_ssse3.c | 132 …src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 … in ih264e_sixtapfilter_horz_ssse3() 133 …src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 … in ih264e_sixtapfilter_horz_ssse3() 283 src1_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); in ih264e_sixtap_filter_2dvh_vert_ssse3() 287 src1_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src); in ih264e_sixtap_filter_2dvh_vert_ssse3() 291 src1_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src); in ih264e_sixtap_filter_2dvh_vert_ssse3() 295 src1_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src); in ih264e_sixtap_filter_2dvh_vert_ssse3() 299 src1_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src); in ih264e_sixtap_filter_2dvh_vert_ssse3() 305 src1_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src); in ih264e_sixtap_filter_2dvh_vert_ssse3() 393 src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1)); in ih264e_sixtap_filter_2dvh_vert_ssse3() 394 src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 1)); in ih264e_sixtap_filter_2dvh_vert_ssse3() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_convolve_2d_sse2.c | 20 s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); in copy_64() 21 s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); in copy_64() 22 s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8)); in copy_64() 23 s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8)); in copy_64() 24 s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 8)); in copy_64() 25 s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 8)); in copy_64() 26 s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 8)); in copy_64() 27 s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 8)); in copy_64() 40 s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); in copy_128() 41 s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); in copy_128() [all …]
|
D | cfl_sse2.c | 44 l0 = _mm_add_epi16(_mm_loadu_si128(src), in subtract_average_sse2() 45 _mm_loadu_si128(src + CFL_BUF_LINE_I128)); in subtract_average_sse2() 47 l0 = _mm_add_epi16(_mm_loadu_si128(src), _mm_loadu_si128(src + 1)); in subtract_average_sse2() 52 l0 = _mm_add_epi16(_mm_loadu_si128(src + 2), _mm_loadu_si128(src + 3)); in subtract_average_sse2() 72 _mm_storeu_si128(dst, _mm_sub_epi16(_mm_loadu_si128(src), avg_epi16)); in subtract_average_sse2() 75 _mm_sub_epi16(_mm_loadu_si128(src + 1), avg_epi16)); in subtract_average_sse2() 78 _mm_sub_epi16(_mm_loadu_si128(src + 2), avg_epi16)); in subtract_average_sse2() 80 _mm_sub_epi16(_mm_loadu_si128(src + 3), avg_epi16)); in subtract_average_sse2()
|
D | cfl_ssse3.c | 64 __m128i top = _mm_loadu_si128((__m128i *)input); in cfl_luma_subsampling_420_lbd_ssse3() 66 __m128i bot = _mm_loadu_si128((__m128i *)(input + input_stride)); in cfl_luma_subsampling_420_lbd_ssse3() 71 __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1); in cfl_luma_subsampling_420_lbd_ssse3() 73 _mm_loadu_si128(((__m128i *)(input + input_stride)) + 1); in cfl_luma_subsampling_420_lbd_ssse3() 112 __m128i top = _mm_loadu_si128((__m128i *)input); in cfl_luma_subsampling_422_lbd_ssse3() 116 __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1); in cfl_luma_subsampling_422_lbd_ssse3() 153 __m128i row = _mm_loadu_si128((__m128i *)input); in cfl_luma_subsampling_444_lbd_ssse3() 159 __m128i row_1 = _mm_loadu_si128(((__m128i *)input) + 1); in cfl_luma_subsampling_444_lbd_ssse3() 196 const __m128i top = _mm_loadu_si128((__m128i *)input); in cfl_luma_subsampling_420_hbd_ssse3() 197 const __m128i bot = _mm_loadu_si128((__m128i *)(input + input_stride)); in cfl_luma_subsampling_420_hbd_ssse3() [all …]
|
D | convolve_2d_sse2.c | 49 const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter); in av1_convolve_2d_sr_sse2() 72 _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); in av1_convolve_2d_sr_sse2() 115 const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter); in av1_convolve_2d_sr_sse2() 221 s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16)); in copy_128() 222 s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16)); in copy_128() 223 s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16)); in copy_128() 224 s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16)); in copy_128() 225 s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 16)); in copy_128() 226 s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 16)); in copy_128() 227 s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 16)); in copy_128() [all …]
|
/external/webrtc/modules/desktop_capture/ |
D | differ_vector_sse2.cc | 30 v0 = _mm_loadu_si128(i1); in VectorDifference_SSE2_W16() 31 v1 = _mm_loadu_si128(i2); in VectorDifference_SSE2_W16() 34 v0 = _mm_loadu_si128(i1 + 1); in VectorDifference_SSE2_W16() 35 v1 = _mm_loadu_si128(i2 + 1); in VectorDifference_SSE2_W16() 38 v0 = _mm_loadu_si128(i1 + 2); in VectorDifference_SSE2_W16() 39 v1 = _mm_loadu_si128(i2 + 2); in VectorDifference_SSE2_W16() 42 v0 = _mm_loadu_si128(i1 + 3); in VectorDifference_SSE2_W16() 43 v1 = _mm_loadu_si128(i2 + 3); in VectorDifference_SSE2_W16() 62 v0 = _mm_loadu_si128(i1); in VectorDifference_SSE2_W32() 63 v1 = _mm_loadu_si128(i2); in VectorDifference_SSE2_W32() [all …]
|
/external/libhevc/common/x86/ |
D | ihevc_32x32_itrans_recon_sse42_intr.c | 251 m_temp_reg_70 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 253 m_temp_reg_71 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 255 m_temp_reg_72 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 257 m_temp_reg_73 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 259 m_temp_reg_74 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 261 m_temp_reg_75 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 263 m_temp_reg_76 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 265 m_temp_reg_77 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 268 m_temp_reg_80 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() 270 m_temp_reg_81 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_sse42() [all …]
|
D | ihevc_16x16_itrans_recon_sse42_intr.c | 204 m_temp_reg_70 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_sse42() 206 m_temp_reg_71 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_sse42() 208 m_temp_reg_72 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_sse42() 210 m_temp_reg_73 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_sse42() 212 m_temp_reg_74 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_sse42() 214 m_temp_reg_75 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_sse42() 216 m_temp_reg_76 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_sse42() 218 m_temp_reg_77 = _mm_loadu_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_sse42() 243 m_coeff1 = _mm_loadu_si128((__m128i *)&g_ai2_ihevc_trans_16_even[2][0]); //89 75 in ihevc_itrans_recon_16x16_sse42() 289 … m_coeff3 = _mm_loadu_si128((__m128i *)&g_ai2_ihevc_trans_16_even[3][0]); //75 -18 in ihevc_itrans_recon_16x16_sse42() [all …]
|
D | ihevc_intra_pred_filters_ssse3_intr.c | 418 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); in ihevc_intra_pred_ref_filtering_ssse3() 427 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); in ihevc_intra_pred_ref_filtering_ssse3() 428 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); in ihevc_intra_pred_ref_filtering_ssse3() 439 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); in ihevc_intra_pred_ref_filtering_ssse3() 440 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); in ihevc_intra_pred_ref_filtering_ssse3() 441 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_src + 32)); in ihevc_intra_pred_ref_filtering_ssse3() 442 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_src + 48)); in ihevc_intra_pred_ref_filtering_ssse3() 454 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); in ihevc_intra_pred_ref_filtering_ssse3() 455 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); in ihevc_intra_pred_ref_filtering_ssse3() 456 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_src + 32)); in ihevc_intra_pred_ref_filtering_ssse3() [all …]
|
D | ihevc_weighted_pred_sse42_intr.c | 152 src_temp0_4x32b = _mm_loadu_si128((__m128i *)(pi2_src)); in ihevc_weighted_pred_uni_sse42() 154 src_temp1_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + src_strd)); in ihevc_weighted_pred_uni_sse42() 156 src_temp2_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 2 * src_strd)); in ihevc_weighted_pred_uni_sse42() 158 src_temp3_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 3 * src_strd)); in ihevc_weighted_pred_uni_sse42() 161 src_temp4_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 4)); in ihevc_weighted_pred_uni_sse42() 163 src_temp5_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + src_strd + 4)); in ihevc_weighted_pred_uni_sse42() 165 src_temp6_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 2 * src_strd + 4)); in ihevc_weighted_pred_uni_sse42() 167 src_temp7_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + 3 * src_strd + 4)); in ihevc_weighted_pred_uni_sse42() 277 src_temp0_4x32b = _mm_loadu_si128((__m128i *)(pi2_src)); in ihevc_weighted_pred_uni_sse42() 279 src_temp1_4x32b = _mm_loadu_si128((__m128i *)(pi2_src + src_strd)); in ihevc_weighted_pred_uni_sse42() [all …]
|
D | ihevc_chroma_intra_pred_filters_ssse3_intr.c | 216 src_temp_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + 2 * (two_nt + 1) + col)); in ihevc_intra_pred_chroma_planar_ssse3() 331 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt))); in ihevc_intra_pred_chroma_dc_ssse3() 332 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 16)); in ihevc_intra_pred_chroma_dc_ssse3() 333 src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 32)); in ihevc_intra_pred_chroma_dc_ssse3() 334 src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 48)); in ihevc_intra_pred_chroma_dc_ssse3() 375 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt))); in ihevc_intra_pred_chroma_dc_ssse3() 376 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt) + 16)); in ihevc_intra_pred_chroma_dc_ssse3() 406 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + (2 * nt))); in ihevc_intra_pred_chroma_dc_ssse3() 692 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) + 2 + 0)); in ihevc_intra_pred_chroma_ver_ssse3() 709 temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + (4 * nt) + 2 + 0)); in ihevc_intra_pred_chroma_ver_ssse3() [all …]
|
D | ihevc_intra_pred_filters_sse42_intr.c | 135 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); in ihevc_intra_pred_ref_filtering_sse42() 144 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); in ihevc_intra_pred_ref_filtering_sse42() 145 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); in ihevc_intra_pred_ref_filtering_sse42() 156 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); in ihevc_intra_pred_ref_filtering_sse42() 157 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); in ihevc_intra_pred_ref_filtering_sse42() 158 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_src + 32)); in ihevc_intra_pred_ref_filtering_sse42() 159 src_temp4 = _mm_loadu_si128((__m128i *)(pu1_src + 48)); in ihevc_intra_pred_ref_filtering_sse42() 171 src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); in ihevc_intra_pred_ref_filtering_sse42() 172 src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); in ihevc_intra_pred_ref_filtering_sse42() 173 src_temp3 = _mm_loadu_si128((__m128i *)(pu1_src + 32)); in ihevc_intra_pred_ref_filtering_sse42() [all …]
|
/external/XNNPACK/src/u8-maxpool/ |
D | 9p8x-minmax-sse2-c16.c | 82 const __m128i vi0 = _mm_loadu_si128((const __m128i*) i0); i0 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 83 const __m128i vi1 = _mm_loadu_si128((const __m128i*) i1); i1 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 84 const __m128i vi2 = _mm_loadu_si128((const __m128i*) i2); i2 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 85 const __m128i vi3 = _mm_loadu_si128((const __m128i*) i3); i3 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 86 const __m128i vi4 = _mm_loadu_si128((const __m128i*) i4); i4 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 87 const __m128i vi5 = _mm_loadu_si128((const __m128i*) i5); i5 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 88 const __m128i vi6 = _mm_loadu_si128((const __m128i*) i6); i6 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 89 const __m128i vi7 = _mm_loadu_si128((const __m128i*) i7); i7 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 90 const __m128i vi8 = _mm_loadu_si128((const __m128i*) i8); i8 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 105 const __m128i vi0 = _mm_loadu_si128((const __m128i*) i0); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() [all …]
|
/external/webp/src/dsp/ |
D | lossless_enc_sse2.c | 33 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb in SubtractGreenFromBlueAndRed_SSE2() 61 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb in TransformColor_SSE2() 96 const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]); in CollectColorBlueTransforms_SSE2() 97 const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]); in CollectColorBlueTransforms_SSE2() 142 const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]); in CollectColorRedTransforms_SSE2() 143 const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]); in CollectColorRedTransforms_SSE2() 180 const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]); in AddVector_SSE2() 181 const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]); in AddVector_SSE2() 183 const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]); in AddVector_SSE2() 184 const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]); in AddVector_SSE2() [all …]
|
D | enc_sse41.c | 41 const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]); in CollectHistogram_SSE41() 42 const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]); in CollectHistogram_SSE41() 80 const __m128i inA_0 = _mm_loadu_si128((const __m128i*)&inA[BPS * 0]); in TTransform_SSE41() 81 const __m128i inA_1 = _mm_loadu_si128((const __m128i*)&inA[BPS * 1]); in TTransform_SSE41() 82 const __m128i inA_2 = _mm_loadu_si128((const __m128i*)&inA[BPS * 2]); in TTransform_SSE41() 88 const __m128i inB_0 = _mm_loadu_si128((const __m128i*)&inB[BPS * 0]); in TTransform_SSE41() 89 const __m128i inB_1 = _mm_loadu_si128((const __m128i*)&inB[BPS * 1]); in TTransform_SSE41() 90 const __m128i inB_2 = _mm_loadu_si128((const __m128i*)&inB[BPS * 2]); in TTransform_SSE41() 132 const __m128i w_0 = _mm_loadu_si128((const __m128i*)&w[0]); in TTransform_SSE41() 133 const __m128i w_8 = _mm_loadu_si128((const __m128i*)&w[8]); in TTransform_SSE41() [all …]
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up32x9-minmax-avx512skx-mul32.c | 99 … const __m512i vi0x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i0)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 101 …const __m512i vi0xGHIJKLMNOPQRSTUV = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) (i0 + 1… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 108 … const __m512i vi1x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i1)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 110 …const __m512i vi1xGHIJKLMNOPQRSTUV = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) (i1 + 1… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 117 … const __m512i vi2x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i2)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 119 …const __m512i vi2xGHIJKLMNOPQRSTUV = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) (i2 + 1… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 126 … const __m512i vi3x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i3)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 128 …const __m512i vi3xGHIJKLMNOPQRSTUV = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) (i3 + 1… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 135 … const __m512i vi4x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i4)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 137 …const __m512i vi4xGHIJKLMNOPQRSTUV = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) (i4 + 1… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() [all …]
|
D | up32x9-minmax-avx2-mul16.c | 89 … const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 90 …const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 91 …const __m256i vi0xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i0 + 1… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 92 …const __m256i vk0xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 104 … const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 105 …const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 106 …const __m256i vi1xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i1 + 1… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 107 …const __m256i vk1xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 119 … const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 120 …const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() [all …]
|
D | up16x9-minmax-avx512skx-mul32.c | 97 … const __m512i vi0x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 103 … const __m512i vi1x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 109 … const __m512i vi2x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i2)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 115 … const __m512i vi3x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i3)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 121 … const __m512i vi4x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i4)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 127 … const __m512i vi5x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i5)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 133 … const __m512i vi6x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i6)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 139 … const __m512i vi7x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i7)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 145 … const __m512i vi8x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i8)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 189 … const __m512i vi0x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) i0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() [all …]
|
D | up16x9-minmax-avx2-mul16.c | 87 … const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 88 …const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 96 … const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 97 …const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 105 … const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 106 …const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 114 … const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 115 …const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 123 … const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 124 …const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() [all …]
|
/external/flac/src/libFLAC/ |
D | lpc_intrin_sse2.c | 83 summ = _mm_madd_epi16(q11, _mm_loadu_si128((const __m128i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 84 …mull = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(sum… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 85 …mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 86 …mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 87 …mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 88 …mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 89 …mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 90 …mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 91 …mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 92 …mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() [all …]
|
D | lpc_intrin_sse41.c | 961 qlp[0] = _mm_loadu_si128((const __m128i*)(qlp_coeff + 0)); // q[3] q[2] q[1] q[0] in FLAC__lpc_restore_signal_intrin_sse41() 962 qlp[1] = _mm_loadu_si128((const __m128i*)(qlp_coeff + 4)); // q[7] q[6] q[5] q[4] in FLAC__lpc_restore_signal_intrin_sse41() 963 qlp[2] = _mm_loadu_si128((const __m128i*)(qlp_coeff + 8)); // q[11] q[10] q[9] q[8] in FLAC__lpc_restore_signal_intrin_sse41() 974 …dat[2] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(data - 12)), _MM_SHUFFLE(0, 1, 2, 3));… in FLAC__lpc_restore_signal_intrin_sse41() 975 …dat[1] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(data - 8)), _MM_SHUFFLE(0, 1, 2, 3)); … in FLAC__lpc_restore_signal_intrin_sse41() 976 …dat[0] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(data - 4)), _MM_SHUFFLE(0, 1, 2, 3)); … in FLAC__lpc_restore_signal_intrin_sse41() 1003 qlp[0] = _mm_loadu_si128((const __m128i*)(qlp_coeff + 0)); in FLAC__lpc_restore_signal_intrin_sse41() 1004 qlp[1] = _mm_loadu_si128((const __m128i*)(qlp_coeff + 4)); in FLAC__lpc_restore_signal_intrin_sse41() 1006 dat[1] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(data - 8)), _MM_SHUFFLE(0, 1, 2, 3)); in FLAC__lpc_restore_signal_intrin_sse41() 1007 dat[0] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(data - 4)), _MM_SHUFFLE(0, 1, 2, 3)); in FLAC__lpc_restore_signal_intrin_sse41() [all …]
|
/external/libmpeg2/common/x86/ |
D | impeg2_inter_pred_sse42_intr.c | 84 src_r0 = _mm_loadu_si128((__m128i *) (src)); in impeg2_copy_mb_sse42() 85 src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); in impeg2_copy_mb_sse42() 86 src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); in impeg2_copy_mb_sse42() 87 src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); in impeg2_copy_mb_sse42() 97 src_r0 = _mm_loadu_si128((__m128i *) (src)); in impeg2_copy_mb_sse42() 98 src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); in impeg2_copy_mb_sse42() 99 src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); in impeg2_copy_mb_sse42() 100 src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); in impeg2_copy_mb_sse42() 110 src_r0 = _mm_loadu_si128((__m128i *) (src)); in impeg2_copy_mb_sse42() 111 src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); in impeg2_copy_mb_sse42() [all …]
|
/external/skqp/src/opts/ |
D | SkBlitRow_opts.h | 90 auto s0 = _mm_loadu_si128((const __m128i*)(src) + 0), in blit_row_s32a_opaque() 91 s1 = _mm_loadu_si128((const __m128i*)(src) + 1), in blit_row_s32a_opaque() 92 s2 = _mm_loadu_si128((const __m128i*)(src) + 2), in blit_row_s32a_opaque() 93 s3 = _mm_loadu_si128((const __m128i*)(src) + 3); in blit_row_s32a_opaque() 126 _mm_storeu_si128(d0, SkPMSrcOver_SSE2(s0, _mm_loadu_si128(d0))); in blit_row_s32a_opaque() 127 _mm_storeu_si128(d1, SkPMSrcOver_SSE2(s1, _mm_loadu_si128(d1))); in blit_row_s32a_opaque() 128 _mm_storeu_si128(d2, SkPMSrcOver_SSE2(s2, _mm_loadu_si128(d2))); in blit_row_s32a_opaque() 129 _mm_storeu_si128(d3, SkPMSrcOver_SSE2(s3, _mm_loadu_si128(d3))); in blit_row_s32a_opaque() 138 auto s0 = _mm_loadu_si128((const __m128i*)(src) + 0), in blit_row_s32a_opaque() 139 s1 = _mm_loadu_si128((const __m128i*)(src) + 1), in blit_row_s32a_opaque() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | highbd_subtract_sse2.c | 115 u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride)); in subtract_8x4() 116 u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride)); in subtract_8x4() 117 u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride)); in subtract_8x4() 118 u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride)); in subtract_8x4() 120 v0 = _mm_loadu_si128((__m128i const *)(pred + 0 * pred_stride)); in subtract_8x4() 121 v1 = _mm_loadu_si128((__m128i const *)(pred + 1 * pred_stride)); in subtract_8x4() 122 v2 = _mm_loadu_si128((__m128i const *)(pred + 2 * pred_stride)); in subtract_8x4() 123 v3 = _mm_loadu_si128((__m128i const *)(pred + 3 * pred_stride)); in subtract_8x4() 143 u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride)); in subtract_8x8() 144 u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride)); in subtract_8x8() [all …]
|