Home
last modified time | relevance | path

Searched refs:_mm_mul_epi32 (Results 1 – 25 of 84) sorted by relevance

1234

/external/libvpx/libvpx/vp9/common/x86/
Dvp9_highbd_iht4x4_add_sse4.c29 s0[0] = _mm_mul_epi32(pair_c1, temp[0]); in highbd_iadst4_sse4_1()
30 s0[1] = _mm_mul_epi32(pair_c1, temp[1]); in highbd_iadst4_sse4_1()
31 s1[0] = _mm_mul_epi32(pair_c2, temp[0]); in highbd_iadst4_sse4_1()
32 s1[1] = _mm_mul_epi32(pair_c2, temp[1]); in highbd_iadst4_sse4_1()
35 s2[0] = _mm_mul_epi32(pair_c3, temp[0]); in highbd_iadst4_sse4_1()
36 s2[1] = _mm_mul_epi32(pair_c3, temp[1]); in highbd_iadst4_sse4_1()
39 s3[0] = _mm_mul_epi32(pair_c4, temp[0]); in highbd_iadst4_sse4_1()
40 s3[1] = _mm_mul_epi32(pair_c4, temp[1]); in highbd_iadst4_sse4_1()
41 s4[0] = _mm_mul_epi32(pair_c1, temp[0]); in highbd_iadst4_sse4_1()
42 s4[1] = _mm_mul_epi32(pair_c1, temp[1]); in highbd_iadst4_sse4_1()
[all …]
Dvp9_highbd_iht8x8_add_sse4.c25 s[0] = _mm_mul_epi32(pair_c, x[0]); in highbd_iadst_half_butterfly_sse4_1()
26 s[1] = _mm_mul_epi32(pair_c, x[1]); in highbd_iadst_half_butterfly_sse4_1()
41 t00[0] = _mm_mul_epi32(pair_c0, x0[0]); in highbd_iadst_butterfly_sse4_1()
42 t00[1] = _mm_mul_epi32(pair_c0, x0[1]); in highbd_iadst_butterfly_sse4_1()
43 t01[0] = _mm_mul_epi32(pair_c0, x1[0]); in highbd_iadst_butterfly_sse4_1()
44 t01[1] = _mm_mul_epi32(pair_c0, x1[1]); in highbd_iadst_butterfly_sse4_1()
45 t10[0] = _mm_mul_epi32(pair_c1, x0[0]); in highbd_iadst_butterfly_sse4_1()
46 t10[1] = _mm_mul_epi32(pair_c1, x0[1]); in highbd_iadst_butterfly_sse4_1()
47 t11[0] = _mm_mul_epi32(pair_c1, x1[0]); in highbd_iadst_butterfly_sse4_1()
48 t11[1] = _mm_mul_epi32(pair_c1, x1[1]); in highbd_iadst_butterfly_sse4_1()
Dvp9_highbd_iht16x16_add_sse4.c25 s[0] = _mm_mul_epi32(pair_c, x[0]); in highbd_iadst_half_butterfly_sse4_1()
26 s[1] = _mm_mul_epi32(pair_c, x[1]); in highbd_iadst_half_butterfly_sse4_1()
41 t00[0] = _mm_mul_epi32(pair_c0, x0[0]); in highbd_iadst_butterfly_sse4_1()
42 t00[1] = _mm_mul_epi32(pair_c0, x0[1]); in highbd_iadst_butterfly_sse4_1()
43 t01[0] = _mm_mul_epi32(pair_c0, x1[0]); in highbd_iadst_butterfly_sse4_1()
44 t01[1] = _mm_mul_epi32(pair_c0, x1[1]); in highbd_iadst_butterfly_sse4_1()
45 t10[0] = _mm_mul_epi32(pair_c1, x0[0]); in highbd_iadst_butterfly_sse4_1()
46 t10[1] = _mm_mul_epi32(pair_c1, x0[1]); in highbd_iadst_butterfly_sse4_1()
47 t11[0] = _mm_mul_epi32(pair_c1, x1[0]); in highbd_iadst_butterfly_sse4_1()
48 t11[1] = _mm_mul_epi32(pair_c1, x1[1]); in highbd_iadst_butterfly_sse4_1()
/external/flac/src/libFLAC/
Dlpc_intrin_sse41.c90 xmm7 = _mm_mul_epi32(xmm7, xmm5); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
96 xmm6 = _mm_mul_epi32(xmm6, xmm4); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
103 xmm6 = _mm_mul_epi32(xmm6, xmm3); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
110 xmm6 = _mm_mul_epi32(xmm6, xmm2); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
117 xmm6 = _mm_mul_epi32(xmm6, xmm1); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
124 xmm6 = _mm_mul_epi32(xmm6, xmm0); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
150 xmm7 = _mm_mul_epi32(xmm7, xmm5); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
156 xmm6 = _mm_mul_epi32(xmm6, xmm4); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
163 xmm6 = _mm_mul_epi32(xmm6, xmm3); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
170 xmm6 = _mm_mul_epi32(xmm6, xmm2); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dhighbd_inv_txfm_sse4.h24 t0 = _mm_mul_epi32(in[0], pair_c); in multiplication_round_shift_sse4_1()
25 t1 = _mm_mul_epi32(in[1], pair_c); in multiplication_round_shift_sse4_1()
42 temp1[2] = _mm_mul_epi32(temp1[0], pair_c1); in highbd_butterfly_sse4_1()
43 temp1[3] = _mm_mul_epi32(temp1[1], pair_c1); in highbd_butterfly_sse4_1()
44 temp1[0] = _mm_mul_epi32(temp1[0], pair_c0); in highbd_butterfly_sse4_1()
45 temp1[1] = _mm_mul_epi32(temp1[1], pair_c0); in highbd_butterfly_sse4_1()
46 temp2[2] = _mm_mul_epi32(temp2[0], pair_c0); in highbd_butterfly_sse4_1()
47 temp2[3] = _mm_mul_epi32(temp2[1], pair_c0); in highbd_butterfly_sse4_1()
48 temp2[0] = _mm_mul_epi32(temp2[0], pair_c1); in highbd_butterfly_sse4_1()
49 temp2[1] = _mm_mul_epi32(temp2[1], pair_c1); in highbd_butterfly_sse4_1()
/external/XNNPACK/src/qs8-requantization/
Dq31-sse4.c67 const __m128i x_product_even = _mm_add_epi64(_mm_mul_epi32(x, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
68 const __m128i y_product_even = _mm_add_epi64(_mm_mul_epi32(y, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
69 const __m128i z_product_even = _mm_add_epi64(_mm_mul_epi32(z, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
70 const __m128i w_product_even = _mm_add_epi64(_mm_mul_epi32(w, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
72 const __m128i x_product_odd = _mm_add_epi64(_mm_mul_epi32(x_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
73 const __m128i y_product_odd = _mm_add_epi64(_mm_mul_epi32(y_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
74 const __m128i z_product_odd = _mm_add_epi64(_mm_mul_epi32(z_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
75 const __m128i w_product_odd = _mm_add_epi64(_mm_mul_epi32(w_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
/external/XNNPACK/src/qu8-requantization/
Dq31-sse4.c67 const __m128i x_product_even = _mm_add_epi64(_mm_mul_epi32(x, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
68 const __m128i y_product_even = _mm_add_epi64(_mm_mul_epi32(y, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
69 const __m128i z_product_even = _mm_add_epi64(_mm_mul_epi32(z, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
70 const __m128i w_product_even = _mm_add_epi64(_mm_mul_epi32(w, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
72 const __m128i x_product_odd = _mm_add_epi64(_mm_mul_epi32(x_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
73 const __m128i y_product_odd = _mm_add_epi64(_mm_mul_epi32(y_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
74 const __m128i z_product_odd = _mm_add_epi64(_mm_mul_epi32(z_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
75 const __m128i w_product_odd = _mm_add_epi64(_mm_mul_epi32(w_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
/external/pdfium/third_party/libopenjpeg20/
Dmct.c246 lo = _mm_mul_epi32(lo, ry); in opj_mct_encode_real()
247 hi = _mm_mul_epi32(hi, ry); in opj_mct_encode_real()
256 lo = _mm_mul_epi32(lo, gy); in opj_mct_encode_real()
257 hi = _mm_mul_epi32(hi, gy); in opj_mct_encode_real()
266 lo = _mm_mul_epi32(lo, by); in opj_mct_encode_real()
267 hi = _mm_mul_epi32(hi, by); in opj_mct_encode_real()
291 lo = _mm_mul_epi32(lo, ru); in opj_mct_encode_real()
292 hi = _mm_mul_epi32(hi, ru); in opj_mct_encode_real()
301 lo = _mm_mul_epi32(lo, gu); in opj_mct_encode_real()
302 hi = _mm_mul_epi32(hi, gu); in opj_mct_encode_real()
[all …]
/external/libopus/silk/fixed/x86/
Dprefilter_FIX_sse.c91 … xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */ in silk_warped_LPC_analysis_filter_FIX_sse4_1()
92 xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 ); in silk_warped_LPC_analysis_filter_FIX_sse4_1()
100 xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa ); in silk_warped_LPC_analysis_filter_FIX_sse4_1()
101 xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb ); in silk_warped_LPC_analysis_filter_FIX_sse4_1()
/external/XNNPACK/src/qs8-gemm/gen/
D4x4c2-minmax-xop-ld128.c202 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
203 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
204 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
205 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
207 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
208 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
209 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
210 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
D4x4c2-minmax-xop-ld64.c202 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
203 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
204 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
205 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
207 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
208 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
209 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
210 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
D4x4c2-xw-minmax-xop.c195 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
196 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
197 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
198 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
200 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
201 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
202 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
203 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
D4x4c2-xw-minmax-sse41.c190 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
191 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
192 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
193 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
195 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
196 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
197 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
198 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
D3x4c8-minmax-xop-ld128.c132 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
133 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
134 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
136 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
137 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
138 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
D3x4c8-minmax-sse41-ld128.c127 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
128 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
129 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
131 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
132 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
133 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
D3x4c8-minmax-xop-ld64.c134 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
135 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
136 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
138 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
139 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
140 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
D3x4c8-xw-minmax-xop.c130 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
131 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
132 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
134 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
135 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
136 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
D3x4c8-minmax-sse41-ld64.c129 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
130 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
131 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
133 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
134 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
135 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
D3x4c8-xw-minmax-sse41.c125 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
126 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
127 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
129 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
130 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
131 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-xop-ld64.c151 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
152 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
153 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
155 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
156 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
157 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
D3x4c8-minmax-xop-ld128.c149 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
150 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
151 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
153 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
154 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
155 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
D4x4c2-minmax-sse41-ld128.c216 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
217 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
218 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
219 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
221 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
222 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
223 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
224 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
D4x4c2-minmax-sse41-ld64.c216 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
217 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
218 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
219 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
221 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
222 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
223 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
224 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
D4x4c2-minmax-xop-ld128.c221 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
222 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
223 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
224 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
226 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
227 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
228 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
229 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
D4x4c2-minmax-xop-ld64.c221 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
222 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
223 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
224 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
226 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
227 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
228 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
229 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()

1234