/external/libhevc/common/x86/ |
D | ihevc_weighted_pred_sse42_intr.c | 176 src_temp0_4x32b = _mm_add_epi32(src_temp0_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42() 177 src_temp1_4x32b = _mm_add_epi32(src_temp1_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42() 178 src_temp2_4x32b = _mm_add_epi32(src_temp2_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42() 179 src_temp3_4x32b = _mm_add_epi32(src_temp3_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42() 194 src_temp4_4x32b = _mm_add_epi32(src_temp4_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42() 195 src_temp5_4x32b = _mm_add_epi32(src_temp5_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42() 196 src_temp6_4x32b = _mm_add_epi32(src_temp6_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42() 197 src_temp7_4x32b = _mm_add_epi32(src_temp7_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42() 206 src_temp0_4x32b = _mm_add_epi32(src_temp0_4x32b, const_temp_4x32b); in ihevc_weighted_pred_uni_sse42() 207 src_temp1_4x32b = _mm_add_epi32(src_temp1_4x32b, const_temp_4x32b); in ihevc_weighted_pred_uni_sse42() [all …]
|
D | ihevc_32x32_itrans_recon_sse42_intr.c | 362 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 376 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 389 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 403 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 417 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 431 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 448 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 462 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 477 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() 490 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42() [all …]
|
D | ihevc_itrans_recon_32x32_ssse3_intr.c | 373 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 387 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 400 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 414 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 428 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 442 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 459 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 473 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 488 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() 501 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3() [all …]
|
D | ihevc_itrans_recon_ssse3_intr.c | 194 m_temp_reg_23 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_21); in ihevc_itrans_recon_4x4_ttype1_ssse3() 195 m_temp_reg_13 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3() 201 m_temp_reg_10 = _mm_add_epi32(m_temp_reg_0, m_temp_reg_2); in ihevc_itrans_recon_4x4_ttype1_ssse3() 206 m_temp_reg_11 = _mm_add_epi32(m_temp_reg_2, m_temp_reg_3); in ihevc_itrans_recon_4x4_ttype1_ssse3() 217 m_temp_reg_14 = _mm_add_epi32(m_temp_reg_14, m_temp_reg_3); in ihevc_itrans_recon_4x4_ttype1_ssse3() 265 m_temp_reg_23 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_21); in ihevc_itrans_recon_4x4_ttype1_ssse3() 266 m_temp_reg_36 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3() 272 m_temp_reg_20 = _mm_add_epi32(m_temp_reg_30, m_temp_reg_31); in ihevc_itrans_recon_4x4_ttype1_ssse3() 273 m_temp_reg_4 = _mm_add_epi32(m_rdng_factor, m_temp_reg_13); in ihevc_itrans_recon_4x4_ttype1_ssse3() 274 m_temp_reg_20 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_4); in ihevc_itrans_recon_4x4_ttype1_ssse3() [all …]
|
D | ihevc_itrans_recon_sse42_intr.c | 182 m_temp_reg_10 = _mm_add_epi32(m_temp_reg_0, m_temp_reg_2); in ihevc_itrans_recon_4x4_ttype1_sse42() 187 m_temp_reg_11 = _mm_add_epi32(m_temp_reg_2, m_temp_reg_3); in ihevc_itrans_recon_4x4_ttype1_sse42() 198 m_temp_reg_14 = _mm_add_epi32(m_temp_reg_14, m_temp_reg_3); in ihevc_itrans_recon_4x4_ttype1_sse42() 223 m_temp_reg_20 = _mm_add_epi32(m_temp_reg_30, m_temp_reg_31); in ihevc_itrans_recon_4x4_ttype1_sse42() 224 m_temp_reg_4 = _mm_add_epi32(m_rdng_factor, m_temp_reg_13); in ihevc_itrans_recon_4x4_ttype1_sse42() 225 m_temp_reg_20 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_4); in ihevc_itrans_recon_4x4_ttype1_sse42() 228 m_temp_reg_21 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_4); in ihevc_itrans_recon_4x4_ttype1_sse42() 230 m_temp_reg_23 = _mm_add_epi32(m_temp_reg_34, m_temp_reg_35); in ihevc_itrans_recon_4x4_ttype1_sse42() 232 m_temp_reg_23 = _mm_add_epi32(m_temp_reg_23, m_temp_reg_4); in ihevc_itrans_recon_4x4_ttype1_sse42() 234 m_temp_reg_22 = _mm_add_epi32(m_temp_reg_36, m_rdng_factor); in ihevc_itrans_recon_4x4_ttype1_sse42() [all …]
|
D | ihevc_16x16_itrans_recon_sse42_intr.c | 262 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42() 281 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_25, m_temp_reg_31); in ihevc_itrans_recon_16x16_sse42() 300 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_26, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42() 317 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_27, m_temp_reg_31); in ihevc_itrans_recon_16x16_sse42() 335 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_26, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42() 351 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_27, m_temp_reg_31); in ihevc_itrans_recon_16x16_sse42() 369 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42() 384 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_25, m_temp_reg_31); in ihevc_itrans_recon_16x16_sse42() 445 m_temp_reg_40 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_20); in ihevc_itrans_recon_16x16_sse42() 450 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42() [all …]
|
D | ihevc_itrans_recon_16x16_ssse3_intr.c | 268 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3() 287 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_25, m_temp_reg_31); in ihevc_itrans_recon_16x16_ssse3() 306 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_26, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3() 323 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_27, m_temp_reg_31); in ihevc_itrans_recon_16x16_ssse3() 341 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_26, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3() 357 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_27, m_temp_reg_31); in ihevc_itrans_recon_16x16_ssse3() 375 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3() 390 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_25, m_temp_reg_31); in ihevc_itrans_recon_16x16_ssse3() 449 m_temp_reg_40 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_20); in ihevc_itrans_recon_16x16_ssse3() 454 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3() [all …]
|
/external/libaom/libaom/av1/encoder/x86/ |
D | av1_fwd_txfm1d_sse4.c | 24 buf1[0] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 28 buf1[1] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 32 buf1[2] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 36 buf1[3] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 40 buf1[4] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 44 buf1[5] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 48 buf1[6] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 52 buf1[7] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 56 buf1[8] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() 60 buf1[9] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1() [all …]
|
D | highbd_fwd_txfm_sse4.c | 74 s0 = _mm_add_epi32(in[0], in[endidx]); in fdct4x4_sse4_1() 77 s1 = _mm_add_epi32(in[num_col], in[endidx]); in fdct4x4_sse4_1() 83 u2 = _mm_add_epi32(u0, u1); in fdct4x4_sse4_1() 86 u3 = _mm_add_epi32(u2, rnding); in fdct4x4_sse4_1() 87 v1 = _mm_add_epi32(v0, rnding); in fdct4x4_sse4_1() 95 v2 = _mm_add_epi32(v0, v1); in fdct4x4_sse4_1() 97 v3 = _mm_add_epi32(v2, rnding); in fdct4x4_sse4_1() 104 v3 = _mm_add_epi32(v2, rnding); in fdct4x4_sse4_1() 145 t = _mm_add_epi32(in[idx], in[idx + num_col]); in fadst4x4_sse4_1() 156 t = _mm_add_epi32(s0, s2); in fadst4x4_sse4_1() [all …]
|
/external/libmpeg2/common/x86/ |
D | impeg2_idct_recon_sse42_intr.c | 263 m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16); in impeg2_idct_recon_sse42() 266 m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14); in impeg2_idct_recon_sse42() 294 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in impeg2_idct_recon_sse42() 297 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); in impeg2_idct_recon_sse42() 298 m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); in impeg2_idct_recon_sse42() 324 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32); in impeg2_idct_recon_sse42() 327 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); in impeg2_idct_recon_sse42() 328 m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); in impeg2_idct_recon_sse42() 354 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34); in impeg2_idct_recon_sse42() 357 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); in impeg2_idct_recon_sse42() [all …]
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_dct_intrin_sse2.c | 94 v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); in fdct4_sse2() 95 v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); in fdct4_sse2() 96 v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); in fdct4_sse2() 97 v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); in fdct4_sse2() 133 u[0] = _mm_add_epi32(v[0], v[1]); in fadst4_sse2() 135 u[2] = _mm_add_epi32(v[3], v[4]); in fadst4_sse2() 139 u[6] = _mm_add_epi32(u[3], u[5]); in fadst4_sse2() 141 v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); in fadst4_sse2() 142 v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); in fadst4_sse2() 143 v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); in fadst4_sse2() [all …]
|
/external/libopus/celt/x86/ |
D | pitch_sse4_1.c | 70 acc1 = _mm_add_epi32(acc1, inVec1_76543210); in celt_inner_prod_sse4_1() 71 acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98); in celt_inner_prod_sse4_1() 74 acc1 = _mm_add_epi32(acc1, acc2); in celt_inner_prod_sse4_1() 83 acc1 = _mm_add_epi32(acc1, inVec1_76543210); in celt_inner_prod_sse4_1() 94 acc1 = _mm_add_epi32(acc1, inVec1_3210); in celt_inner_prod_sse4_1() 98 acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1)); in celt_inner_prod_sse4_1() 99 acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E)); in celt_inner_prod_sse4_1() 135 sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0)); in xcorr_kernel_sse4_1() 136 sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1)); in xcorr_kernel_sse4_1() 137 sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2)); in xcorr_kernel_sse4_1() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 55 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 56 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 57 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 58 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 59 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 60 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 61 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 62 __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxSTUV, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 64 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 65 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() [all …]
|
D | minmax-sse41-mul32-ld32-x24.c | 51 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 52 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 53 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 54 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 55 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 56 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 58 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 59 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 60 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vy89AB, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 61 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vyCDEF, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() [all …]
|
D | minmax-sse41-mul16-ld64-x32.c | 84 …__m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 85 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 86 …__m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 87 …__m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 88 …__m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 89 …__m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 90 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 91 …__m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 93 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 94 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | highbd_idct32x32_add_sse4.c | 36 step1[8] = _mm_add_epi32(step2[8], step2[11]); in highbd_idct32_4x32_quarter_2_stage_4_to_6() 37 step1[9] = _mm_add_epi32(step2[9], step2[10]); in highbd_idct32_4x32_quarter_2_stage_4_to_6() 42 step1[14] = _mm_add_epi32(step2[14], step2[13]); in highbd_idct32_4x32_quarter_2_stage_4_to_6() 43 step1[15] = _mm_add_epi32(step2[15], step2[12]); in highbd_idct32_4x32_quarter_2_stage_4_to_6() 61 step2[16] = _mm_add_epi32(step1[16], step1[19]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 62 step2[17] = _mm_add_epi32(step1[17], step1[18]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 67 step2[22] = _mm_add_epi32(step1[22], step1[21]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 68 step2[23] = _mm_add_epi32(step1[23], step1[20]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 70 step2[24] = _mm_add_epi32(step1[24], step1[27]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 71 step2[25] = _mm_add_epi32(step1[25], step1[26]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() [all …]
|
D | fwd_dct32x32_impl_sse2.h | 422 const __m128i s2_20_4 = _mm_add_epi32(s2_20_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 423 const __m128i s2_20_5 = _mm_add_epi32(s2_20_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 424 const __m128i s2_21_4 = _mm_add_epi32(s2_21_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 425 const __m128i s2_21_5 = _mm_add_epi32(s2_21_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 426 const __m128i s2_22_4 = _mm_add_epi32(s2_22_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 427 const __m128i s2_22_5 = _mm_add_epi32(s2_22_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 428 const __m128i s2_23_4 = _mm_add_epi32(s2_23_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 429 const __m128i s2_23_5 = _mm_add_epi32(s2_23_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 430 const __m128i s2_24_4 = _mm_add_epi32(s2_24_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() 431 const __m128i s2_24_5 = _mm_add_epi32(s2_24_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D() [all …]
|
D | highbd_idct32x32_add_sse2.c | 32 step1[8] = _mm_add_epi32(step2[8], step2[11]); in highbd_idct32_4x32_quarter_2_stage_4_to_6() 33 step1[9] = _mm_add_epi32(step2[9], step2[10]); in highbd_idct32_4x32_quarter_2_stage_4_to_6() 38 step1[14] = _mm_add_epi32(step2[14], step2[13]); in highbd_idct32_4x32_quarter_2_stage_4_to_6() 39 step1[15] = _mm_add_epi32(step2[15], step2[12]); in highbd_idct32_4x32_quarter_2_stage_4_to_6() 57 step2[16] = _mm_add_epi32(step1[16], step1[19]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 58 step2[17] = _mm_add_epi32(step1[17], step1[18]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 63 step2[22] = _mm_add_epi32(step1[21], step1[22]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 64 step2[23] = _mm_add_epi32(step1[20], step1[23]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 66 step2[24] = _mm_add_epi32(step1[27], step1[24]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() 67 step2[25] = _mm_add_epi32(step1[26], step1[25]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7() [all …]
|
/external/flac/src/libFLAC/ |
D | lpc_intrin_sse2.c | 84 …mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 85 …mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 86 …mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 87 …mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 88 …mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 89 …mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 90 …mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 91 …mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 92 …mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 93 …mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() [all …]
|
/external/libavc/common/x86/ |
D | ih264_iquant_itrans_recon_sse42.c | 160 temp4 = _mm_add_epi32(temp4, add_rshift); in ih264_iquant_itrans_recon_4x4_sse42() 161 temp5 = _mm_add_epi32(temp5, add_rshift); in ih264_iquant_itrans_recon_4x4_sse42() 162 temp6 = _mm_add_epi32(temp6, add_rshift); in ih264_iquant_itrans_recon_4x4_sse42() 163 temp7 = _mm_add_epi32(temp7, add_rshift); in ih264_iquant_itrans_recon_4x4_sse42() 194 temp0 = _mm_add_epi32(resq_r0, resq_r2); in ih264_iquant_itrans_recon_4x4_sse42() 202 temp3 = _mm_add_epi32(temp3, resq_r1); in ih264_iquant_itrans_recon_4x4_sse42() 205 resq_r0 = _mm_add_epi32(temp0, temp3); in ih264_iquant_itrans_recon_4x4_sse42() 207 resq_r1 = _mm_add_epi32(temp1, temp2); in ih264_iquant_itrans_recon_4x4_sse42() 246 temp0 = _mm_add_epi32(resq_r0, resq_r2); in ih264_iquant_itrans_recon_4x4_sse42() 254 temp3 = _mm_add_epi32(temp3, resq_r1); in ih264_iquant_itrans_recon_4x4_sse42() [all …]
|
D | ih264_iquant_itrans_recon_ssse3.c | 161 temp4 = _mm_add_epi32(temp4, add_rshift); in ih264_iquant_itrans_recon_4x4_ssse3() 162 temp5 = _mm_add_epi32(temp5, add_rshift); in ih264_iquant_itrans_recon_4x4_ssse3() 163 temp6 = _mm_add_epi32(temp6, add_rshift); in ih264_iquant_itrans_recon_4x4_ssse3() 164 temp7 = _mm_add_epi32(temp7, add_rshift); in ih264_iquant_itrans_recon_4x4_ssse3() 201 temp0 = _mm_add_epi32(resq_r0, resq_r2); in ih264_iquant_itrans_recon_4x4_ssse3() 209 temp3 = _mm_add_epi32(temp3, resq_r1); in ih264_iquant_itrans_recon_4x4_ssse3() 212 resq_r0 = _mm_add_epi32(temp0, temp3); in ih264_iquant_itrans_recon_4x4_ssse3() 214 resq_r1 = _mm_add_epi32(temp1, temp2); in ih264_iquant_itrans_recon_4x4_ssse3() 260 temp0 = _mm_add_epi32(resq_r0, resq_r2); in ih264_iquant_itrans_recon_4x4_ssse3() 268 temp3 = _mm_add_epi32(temp3, resq_r1); in ih264_iquant_itrans_recon_4x4_ssse3() [all …]
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-sse41-mul16.c | 113 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 114 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 115 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 116 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 117 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 118 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 144 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 145 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() [all …]
|
D | up16x9-minmax-sse41-mul16.c | 105 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 106 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 107 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 108 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 128 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 129 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x4c2-xw-minmax-sse41.c | 85 vacc0x0123 = _mm_add_epi32(vacc0x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 87 vacc1x0123 = _mm_add_epi32(vacc1x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 89 vacc2x0123 = _mm_add_epi32(vacc2x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 91 vacc3x0123 = _mm_add_epi32(vacc3x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 95 vacc0x0123 = _mm_add_epi32(vacc0x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 97 vacc1x0123 = _mm_add_epi32(vacc1x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 99 vacc2x0123 = _mm_add_epi32(vacc2x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 101 vacc3x0123 = _mm_add_epi32(vacc3x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 105 vacc0x0123 = _mm_add_epi32(vacc0x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 107 vacc1x0123 = _mm_add_epi32(vacc1x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 35 …vzero_point_product = _mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 48 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 49 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 50 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 51 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 52 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 53 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 54 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 55 __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxSTUV, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 57 …const __m128i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() [all …]
|