Home
last modified time | relevance | path

Searched refs:_mm_add_epi32 (Results 1 – 25 of 453) sorted by relevance

12345678910>>...19

/external/libhevc/common/x86/
Dihevc_weighted_pred_sse42_intr.c176 src_temp0_4x32b = _mm_add_epi32(src_temp0_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42()
177 src_temp1_4x32b = _mm_add_epi32(src_temp1_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42()
178 src_temp2_4x32b = _mm_add_epi32(src_temp2_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42()
179 src_temp3_4x32b = _mm_add_epi32(src_temp3_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42()
194 src_temp4_4x32b = _mm_add_epi32(src_temp4_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42()
195 src_temp5_4x32b = _mm_add_epi32(src_temp5_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42()
196 src_temp6_4x32b = _mm_add_epi32(src_temp6_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42()
197 src_temp7_4x32b = _mm_add_epi32(src_temp7_4x32b, lvl_shift_4x32b); in ihevc_weighted_pred_uni_sse42()
206 src_temp0_4x32b = _mm_add_epi32(src_temp0_4x32b, const_temp_4x32b); in ihevc_weighted_pred_uni_sse42()
207 src_temp1_4x32b = _mm_add_epi32(src_temp1_4x32b, const_temp_4x32b); in ihevc_weighted_pred_uni_sse42()
[all …]
Dihevc_32x32_itrans_recon_sse42_intr.c362 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
376 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
389 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
403 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
417 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
431 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
448 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
462 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
477 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
490 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_30); in ihevc_itrans_recon_32x32_sse42()
[all …]
Dihevc_itrans_recon_32x32_ssse3_intr.c373 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
387 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
400 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
414 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
428 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
442 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
459 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
473 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
488 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
501 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_30); in ihevc_itrans_recon_32x32_ssse3()
[all …]
Dihevc_itrans_recon_ssse3_intr.c194 m_temp_reg_23 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_21); in ihevc_itrans_recon_4x4_ttype1_ssse3()
195 m_temp_reg_13 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3()
201 m_temp_reg_10 = _mm_add_epi32(m_temp_reg_0, m_temp_reg_2); in ihevc_itrans_recon_4x4_ttype1_ssse3()
206 m_temp_reg_11 = _mm_add_epi32(m_temp_reg_2, m_temp_reg_3); in ihevc_itrans_recon_4x4_ttype1_ssse3()
217 m_temp_reg_14 = _mm_add_epi32(m_temp_reg_14, m_temp_reg_3); in ihevc_itrans_recon_4x4_ttype1_ssse3()
265 m_temp_reg_23 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_21); in ihevc_itrans_recon_4x4_ttype1_ssse3()
266 m_temp_reg_36 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3()
272 m_temp_reg_20 = _mm_add_epi32(m_temp_reg_30, m_temp_reg_31); in ihevc_itrans_recon_4x4_ttype1_ssse3()
273 m_temp_reg_4 = _mm_add_epi32(m_rdng_factor, m_temp_reg_13); in ihevc_itrans_recon_4x4_ttype1_ssse3()
274 m_temp_reg_20 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_4); in ihevc_itrans_recon_4x4_ttype1_ssse3()
[all …]
Dihevc_itrans_recon_sse42_intr.c182 m_temp_reg_10 = _mm_add_epi32(m_temp_reg_0, m_temp_reg_2); in ihevc_itrans_recon_4x4_ttype1_sse42()
187 m_temp_reg_11 = _mm_add_epi32(m_temp_reg_2, m_temp_reg_3); in ihevc_itrans_recon_4x4_ttype1_sse42()
198 m_temp_reg_14 = _mm_add_epi32(m_temp_reg_14, m_temp_reg_3); in ihevc_itrans_recon_4x4_ttype1_sse42()
223 m_temp_reg_20 = _mm_add_epi32(m_temp_reg_30, m_temp_reg_31); in ihevc_itrans_recon_4x4_ttype1_sse42()
224 m_temp_reg_4 = _mm_add_epi32(m_rdng_factor, m_temp_reg_13); in ihevc_itrans_recon_4x4_ttype1_sse42()
225 m_temp_reg_20 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_4); in ihevc_itrans_recon_4x4_ttype1_sse42()
228 m_temp_reg_21 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_4); in ihevc_itrans_recon_4x4_ttype1_sse42()
230 m_temp_reg_23 = _mm_add_epi32(m_temp_reg_34, m_temp_reg_35); in ihevc_itrans_recon_4x4_ttype1_sse42()
232 m_temp_reg_23 = _mm_add_epi32(m_temp_reg_23, m_temp_reg_4); in ihevc_itrans_recon_4x4_ttype1_sse42()
234 m_temp_reg_22 = _mm_add_epi32(m_temp_reg_36, m_rdng_factor); in ihevc_itrans_recon_4x4_ttype1_sse42()
[all …]
Dihevc_16x16_itrans_recon_sse42_intr.c262 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42()
281 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_25, m_temp_reg_31); in ihevc_itrans_recon_16x16_sse42()
300 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_26, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42()
317 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_27, m_temp_reg_31); in ihevc_itrans_recon_16x16_sse42()
335 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_26, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42()
351 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_27, m_temp_reg_31); in ihevc_itrans_recon_16x16_sse42()
369 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42()
384 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_25, m_temp_reg_31); in ihevc_itrans_recon_16x16_sse42()
445 m_temp_reg_40 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_20); in ihevc_itrans_recon_16x16_sse42()
450 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in ihevc_itrans_recon_16x16_sse42()
[all …]
Dihevc_itrans_recon_16x16_ssse3_intr.c268 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3()
287 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_25, m_temp_reg_31); in ihevc_itrans_recon_16x16_ssse3()
306 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_26, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3()
323 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_27, m_temp_reg_31); in ihevc_itrans_recon_16x16_ssse3()
341 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_26, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3()
357 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_27, m_temp_reg_31); in ihevc_itrans_recon_16x16_ssse3()
375 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3()
390 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_25, m_temp_reg_31); in ihevc_itrans_recon_16x16_ssse3()
449 m_temp_reg_40 = _mm_add_epi32(m_temp_reg_24, m_temp_reg_20); in ihevc_itrans_recon_16x16_ssse3()
454 m_temp_reg_34 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in ihevc_itrans_recon_16x16_ssse3()
[all …]
/external/libaom/libaom/av1/encoder/x86/
Dav1_fwd_txfm1d_sse4.c24 buf1[0] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
28 buf1[1] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
32 buf1[2] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
36 buf1[3] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
40 buf1[4] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
44 buf1[5] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
48 buf1[6] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
52 buf1[7] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
56 buf1[8] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
60 buf1[9] = _mm_add_epi32(input[startidx], input[endidx]); in av1_fdct32_sse4_1()
[all …]
Dhighbd_fwd_txfm_sse4.c74 s0 = _mm_add_epi32(in[0], in[endidx]); in fdct4x4_sse4_1()
77 s1 = _mm_add_epi32(in[num_col], in[endidx]); in fdct4x4_sse4_1()
83 u2 = _mm_add_epi32(u0, u1); in fdct4x4_sse4_1()
86 u3 = _mm_add_epi32(u2, rnding); in fdct4x4_sse4_1()
87 v1 = _mm_add_epi32(v0, rnding); in fdct4x4_sse4_1()
95 v2 = _mm_add_epi32(v0, v1); in fdct4x4_sse4_1()
97 v3 = _mm_add_epi32(v2, rnding); in fdct4x4_sse4_1()
104 v3 = _mm_add_epi32(v2, rnding); in fdct4x4_sse4_1()
145 t = _mm_add_epi32(in[idx], in[idx + num_col]); in fadst4x4_sse4_1()
156 t = _mm_add_epi32(s0, s2); in fadst4x4_sse4_1()
[all …]
/external/libmpeg2/common/x86/
Dimpeg2_idct_recon_sse42_intr.c263 m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16); in impeg2_idct_recon_sse42()
266 m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14); in impeg2_idct_recon_sse42()
294 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); in impeg2_idct_recon_sse42()
297 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); in impeg2_idct_recon_sse42()
298 m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); in impeg2_idct_recon_sse42()
324 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32); in impeg2_idct_recon_sse42()
327 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); in impeg2_idct_recon_sse42()
328 m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); in impeg2_idct_recon_sse42()
354 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34); in impeg2_idct_recon_sse42()
357 m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); in impeg2_idct_recon_sse42()
[all …]
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_dct_intrin_sse2.c94 v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); in fdct4_sse2()
95 v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); in fdct4_sse2()
96 v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); in fdct4_sse2()
97 v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); in fdct4_sse2()
133 u[0] = _mm_add_epi32(v[0], v[1]); in fadst4_sse2()
135 u[2] = _mm_add_epi32(v[3], v[4]); in fadst4_sse2()
139 u[6] = _mm_add_epi32(u[3], u[5]); in fadst4_sse2()
141 v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); in fadst4_sse2()
142 v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); in fadst4_sse2()
143 v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); in fadst4_sse2()
[all …]
/external/libopus/celt/x86/
Dpitch_sse4_1.c70 acc1 = _mm_add_epi32(acc1, inVec1_76543210); in celt_inner_prod_sse4_1()
71 acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98); in celt_inner_prod_sse4_1()
74 acc1 = _mm_add_epi32(acc1, acc2); in celt_inner_prod_sse4_1()
83 acc1 = _mm_add_epi32(acc1, inVec1_76543210); in celt_inner_prod_sse4_1()
94 acc1 = _mm_add_epi32(acc1, inVec1_3210); in celt_inner_prod_sse4_1()
98 acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1)); in celt_inner_prod_sse4_1()
99 acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E)); in celt_inner_prod_sse4_1()
135 sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0)); in xcorr_kernel_sse4_1()
136 sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1)); in xcorr_kernel_sse4_1()
137 sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2)); in xcorr_kernel_sse4_1()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-sse41-mul32-ld32-x32.c55 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
56 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
57 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
58 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
59 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
60 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
61 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
62 __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxSTUV, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
64 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
65 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
[all …]
Dminmax-sse41-mul32-ld32-x24.c51 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
52 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
53 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
54 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
55 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
56 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
58 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
59 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
60 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vy89AB, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
61 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vyCDEF, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
[all …]
Dminmax-sse41-mul16-ld64-x32.c84 …__m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
85 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
86 …__m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
87 …__m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
88 …__m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
89 …__m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
90 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
91 …__m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
93 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
94 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dhighbd_idct32x32_add_sse4.c36 step1[8] = _mm_add_epi32(step2[8], step2[11]); in highbd_idct32_4x32_quarter_2_stage_4_to_6()
37 step1[9] = _mm_add_epi32(step2[9], step2[10]); in highbd_idct32_4x32_quarter_2_stage_4_to_6()
42 step1[14] = _mm_add_epi32(step2[14], step2[13]); in highbd_idct32_4x32_quarter_2_stage_4_to_6()
43 step1[15] = _mm_add_epi32(step2[15], step2[12]); in highbd_idct32_4x32_quarter_2_stage_4_to_6()
61 step2[16] = _mm_add_epi32(step1[16], step1[19]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
62 step2[17] = _mm_add_epi32(step1[17], step1[18]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
67 step2[22] = _mm_add_epi32(step1[22], step1[21]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
68 step2[23] = _mm_add_epi32(step1[23], step1[20]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
70 step2[24] = _mm_add_epi32(step1[24], step1[27]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
71 step2[25] = _mm_add_epi32(step1[25], step1[26]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
[all …]
Dfwd_dct32x32_impl_sse2.h422 const __m128i s2_20_4 = _mm_add_epi32(s2_20_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
423 const __m128i s2_20_5 = _mm_add_epi32(s2_20_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
424 const __m128i s2_21_4 = _mm_add_epi32(s2_21_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
425 const __m128i s2_21_5 = _mm_add_epi32(s2_21_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
426 const __m128i s2_22_4 = _mm_add_epi32(s2_22_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
427 const __m128i s2_22_5 = _mm_add_epi32(s2_22_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
428 const __m128i s2_23_4 = _mm_add_epi32(s2_23_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
429 const __m128i s2_23_5 = _mm_add_epi32(s2_23_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
430 const __m128i s2_24_4 = _mm_add_epi32(s2_24_2, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
431 const __m128i s2_24_5 = _mm_add_epi32(s2_24_3, k__DCT_CONST_ROUNDING); in FDCT32x32_2D()
[all …]
Dhighbd_idct32x32_add_sse2.c32 step1[8] = _mm_add_epi32(step2[8], step2[11]); in highbd_idct32_4x32_quarter_2_stage_4_to_6()
33 step1[9] = _mm_add_epi32(step2[9], step2[10]); in highbd_idct32_4x32_quarter_2_stage_4_to_6()
38 step1[14] = _mm_add_epi32(step2[14], step2[13]); in highbd_idct32_4x32_quarter_2_stage_4_to_6()
39 step1[15] = _mm_add_epi32(step2[15], step2[12]); in highbd_idct32_4x32_quarter_2_stage_4_to_6()
57 step2[16] = _mm_add_epi32(step1[16], step1[19]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
58 step2[17] = _mm_add_epi32(step1[17], step1[18]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
63 step2[22] = _mm_add_epi32(step1[21], step1[22]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
64 step2[23] = _mm_add_epi32(step1[20], step1[23]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
66 step2[24] = _mm_add_epi32(step1[27], step1[24]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
67 step2[25] = _mm_add_epi32(step1[26], step1[25]); in highbd_idct32_4x32_quarter_3_4_stage_4_to_7()
[all …]
/external/flac/src/libFLAC/
Dlpc_intrin_sse2.c84 …mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
85 …mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
86 …mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
87 …mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
88 …mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
89 …mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
90 …mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
91 …mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
92 …mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
93 …mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
[all …]
/external/libavc/common/x86/
Dih264_iquant_itrans_recon_sse42.c160 temp4 = _mm_add_epi32(temp4, add_rshift); in ih264_iquant_itrans_recon_4x4_sse42()
161 temp5 = _mm_add_epi32(temp5, add_rshift); in ih264_iquant_itrans_recon_4x4_sse42()
162 temp6 = _mm_add_epi32(temp6, add_rshift); in ih264_iquant_itrans_recon_4x4_sse42()
163 temp7 = _mm_add_epi32(temp7, add_rshift); in ih264_iquant_itrans_recon_4x4_sse42()
194 temp0 = _mm_add_epi32(resq_r0, resq_r2); in ih264_iquant_itrans_recon_4x4_sse42()
202 temp3 = _mm_add_epi32(temp3, resq_r1); in ih264_iquant_itrans_recon_4x4_sse42()
205 resq_r0 = _mm_add_epi32(temp0, temp3); in ih264_iquant_itrans_recon_4x4_sse42()
207 resq_r1 = _mm_add_epi32(temp1, temp2); in ih264_iquant_itrans_recon_4x4_sse42()
246 temp0 = _mm_add_epi32(resq_r0, resq_r2); in ih264_iquant_itrans_recon_4x4_sse42()
254 temp3 = _mm_add_epi32(temp3, resq_r1); in ih264_iquant_itrans_recon_4x4_sse42()
[all …]
Dih264_iquant_itrans_recon_ssse3.c161 temp4 = _mm_add_epi32(temp4, add_rshift); in ih264_iquant_itrans_recon_4x4_ssse3()
162 temp5 = _mm_add_epi32(temp5, add_rshift); in ih264_iquant_itrans_recon_4x4_ssse3()
163 temp6 = _mm_add_epi32(temp6, add_rshift); in ih264_iquant_itrans_recon_4x4_ssse3()
164 temp7 = _mm_add_epi32(temp7, add_rshift); in ih264_iquant_itrans_recon_4x4_ssse3()
201 temp0 = _mm_add_epi32(resq_r0, resq_r2); in ih264_iquant_itrans_recon_4x4_ssse3()
209 temp3 = _mm_add_epi32(temp3, resq_r1); in ih264_iquant_itrans_recon_4x4_ssse3()
212 resq_r0 = _mm_add_epi32(temp0, temp3); in ih264_iquant_itrans_recon_4x4_ssse3()
214 resq_r1 = _mm_add_epi32(temp1, temp2); in ih264_iquant_itrans_recon_4x4_ssse3()
260 temp0 = _mm_add_epi32(resq_r0, resq_r2); in ih264_iquant_itrans_recon_4x4_ssse3()
268 temp3 = _mm_add_epi32(temp3, resq_r1); in ih264_iquant_itrans_recon_4x4_ssse3()
[all …]
/external/XNNPACK/src/qs8-dwconv/gen/
Dup24x9-minmax-sse41-mul16.c113 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
114 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
115 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
116 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
117 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
118 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
144 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
145 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
[all …]
Dup16x9-minmax-sse41-mul16.c105 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
106 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
107 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
108 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
128 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
129 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x4c2-xw-minmax-sse41.c85 vacc0x0123 = _mm_add_epi32(vacc0x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
87 vacc1x0123 = _mm_add_epi32(vacc1x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
89 vacc2x0123 = _mm_add_epi32(vacc2x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
91 vacc3x0123 = _mm_add_epi32(vacc3x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
95 vacc0x0123 = _mm_add_epi32(vacc0x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
97 vacc1x0123 = _mm_add_epi32(vacc1x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
99 vacc2x0123 = _mm_add_epi32(vacc2x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
101 vacc3x0123 = _mm_add_epi32(vacc3x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
105 vacc0x0123 = _mm_add_epi32(vacc0x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
107 vacc1x0123 = _mm_add_epi32(vacc1x0123, in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
[all …]
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-sse41-mul32-ld32-x32.c35 …vzero_point_product = _mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->s… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
48 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
49 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
50 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
51 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
52 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
53 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
54 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
55 __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxSTUV, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
57 …const __m128i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
[all …]

12345678910>>...19