/external/libvpx/libvpx/vp9/common/x86/ |
D | vp9_highbd_iht4x4_add_sse4.c | 29 s0[0] = _mm_mul_epi32(pair_c1, temp[0]); in highbd_iadst4_sse4_1() 30 s0[1] = _mm_mul_epi32(pair_c1, temp[1]); in highbd_iadst4_sse4_1() 31 s1[0] = _mm_mul_epi32(pair_c2, temp[0]); in highbd_iadst4_sse4_1() 32 s1[1] = _mm_mul_epi32(pair_c2, temp[1]); in highbd_iadst4_sse4_1() 35 s2[0] = _mm_mul_epi32(pair_c3, temp[0]); in highbd_iadst4_sse4_1() 36 s2[1] = _mm_mul_epi32(pair_c3, temp[1]); in highbd_iadst4_sse4_1() 39 s3[0] = _mm_mul_epi32(pair_c4, temp[0]); in highbd_iadst4_sse4_1() 40 s3[1] = _mm_mul_epi32(pair_c4, temp[1]); in highbd_iadst4_sse4_1() 41 s4[0] = _mm_mul_epi32(pair_c1, temp[0]); in highbd_iadst4_sse4_1() 42 s4[1] = _mm_mul_epi32(pair_c1, temp[1]); in highbd_iadst4_sse4_1() [all …]
|
D | vp9_highbd_iht8x8_add_sse4.c | 25 s[0] = _mm_mul_epi32(pair_c, x[0]); in highbd_iadst_half_butterfly_sse4_1() 26 s[1] = _mm_mul_epi32(pair_c, x[1]); in highbd_iadst_half_butterfly_sse4_1() 41 t00[0] = _mm_mul_epi32(pair_c0, x0[0]); in highbd_iadst_butterfly_sse4_1() 42 t00[1] = _mm_mul_epi32(pair_c0, x0[1]); in highbd_iadst_butterfly_sse4_1() 43 t01[0] = _mm_mul_epi32(pair_c0, x1[0]); in highbd_iadst_butterfly_sse4_1() 44 t01[1] = _mm_mul_epi32(pair_c0, x1[1]); in highbd_iadst_butterfly_sse4_1() 45 t10[0] = _mm_mul_epi32(pair_c1, x0[0]); in highbd_iadst_butterfly_sse4_1() 46 t10[1] = _mm_mul_epi32(pair_c1, x0[1]); in highbd_iadst_butterfly_sse4_1() 47 t11[0] = _mm_mul_epi32(pair_c1, x1[0]); in highbd_iadst_butterfly_sse4_1() 48 t11[1] = _mm_mul_epi32(pair_c1, x1[1]); in highbd_iadst_butterfly_sse4_1()
|
D | vp9_highbd_iht16x16_add_sse4.c | 25 s[0] = _mm_mul_epi32(pair_c, x[0]); in highbd_iadst_half_butterfly_sse4_1() 26 s[1] = _mm_mul_epi32(pair_c, x[1]); in highbd_iadst_half_butterfly_sse4_1() 41 t00[0] = _mm_mul_epi32(pair_c0, x0[0]); in highbd_iadst_butterfly_sse4_1() 42 t00[1] = _mm_mul_epi32(pair_c0, x0[1]); in highbd_iadst_butterfly_sse4_1() 43 t01[0] = _mm_mul_epi32(pair_c0, x1[0]); in highbd_iadst_butterfly_sse4_1() 44 t01[1] = _mm_mul_epi32(pair_c0, x1[1]); in highbd_iadst_butterfly_sse4_1() 45 t10[0] = _mm_mul_epi32(pair_c1, x0[0]); in highbd_iadst_butterfly_sse4_1() 46 t10[1] = _mm_mul_epi32(pair_c1, x0[1]); in highbd_iadst_butterfly_sse4_1() 47 t11[0] = _mm_mul_epi32(pair_c1, x1[0]); in highbd_iadst_butterfly_sse4_1() 48 t11[1] = _mm_mul_epi32(pair_c1, x1[1]); in highbd_iadst_butterfly_sse4_1()
|
/external/flac/src/libFLAC/ |
D | lpc_intrin_sse41.c | 90 xmm7 = _mm_mul_epi32(xmm7, xmm5); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 96 xmm6 = _mm_mul_epi32(xmm6, xmm4); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 103 xmm6 = _mm_mul_epi32(xmm6, xmm3); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 110 xmm6 = _mm_mul_epi32(xmm6, xmm2); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 117 xmm6 = _mm_mul_epi32(xmm6, xmm1); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 124 xmm6 = _mm_mul_epi32(xmm6, xmm0); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 150 xmm7 = _mm_mul_epi32(xmm7, xmm5); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 156 xmm6 = _mm_mul_epi32(xmm6, xmm4); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 163 xmm6 = _mm_mul_epi32(xmm6, xmm3); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() 170 xmm6 = _mm_mul_epi32(xmm6, xmm2); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | highbd_inv_txfm_sse4.h | 24 t0 = _mm_mul_epi32(in[0], pair_c); in multiplication_round_shift_sse4_1() 25 t1 = _mm_mul_epi32(in[1], pair_c); in multiplication_round_shift_sse4_1() 42 temp1[2] = _mm_mul_epi32(temp1[0], pair_c1); in highbd_butterfly_sse4_1() 43 temp1[3] = _mm_mul_epi32(temp1[1], pair_c1); in highbd_butterfly_sse4_1() 44 temp1[0] = _mm_mul_epi32(temp1[0], pair_c0); in highbd_butterfly_sse4_1() 45 temp1[1] = _mm_mul_epi32(temp1[1], pair_c0); in highbd_butterfly_sse4_1() 46 temp2[2] = _mm_mul_epi32(temp2[0], pair_c0); in highbd_butterfly_sse4_1() 47 temp2[3] = _mm_mul_epi32(temp2[1], pair_c0); in highbd_butterfly_sse4_1() 48 temp2[0] = _mm_mul_epi32(temp2[0], pair_c1); in highbd_butterfly_sse4_1() 49 temp2[1] = _mm_mul_epi32(temp2[1], pair_c1); in highbd_butterfly_sse4_1()
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-sse4.c | 67 const __m128i x_product_even = _mm_add_epi64(_mm_mul_epi32(x, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4() 68 const __m128i y_product_even = _mm_add_epi64(_mm_mul_epi32(y, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4() 69 const __m128i z_product_even = _mm_add_epi64(_mm_mul_epi32(z, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4() 70 const __m128i w_product_even = _mm_add_epi64(_mm_mul_epi32(w, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4() 72 const __m128i x_product_odd = _mm_add_epi64(_mm_mul_epi32(x_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4() 73 const __m128i y_product_odd = _mm_add_epi64(_mm_mul_epi32(y_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4() 74 const __m128i z_product_odd = _mm_add_epi64(_mm_mul_epi32(z_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4() 75 const __m128i w_product_odd = _mm_add_epi64(_mm_mul_epi32(w_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
|
/external/XNNPACK/src/qu8-requantization/ |
D | q31-sse4.c | 67 const __m128i x_product_even = _mm_add_epi64(_mm_mul_epi32(x, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4() 68 const __m128i y_product_even = _mm_add_epi64(_mm_mul_epi32(y, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4() 69 const __m128i z_product_even = _mm_add_epi64(_mm_mul_epi32(z, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4() 70 const __m128i w_product_even = _mm_add_epi64(_mm_mul_epi32(w, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4() 72 const __m128i x_product_odd = _mm_add_epi64(_mm_mul_epi32(x_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4() 73 const __m128i y_product_odd = _mm_add_epi64(_mm_mul_epi32(y_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4() 74 const __m128i z_product_odd = _mm_add_epi64(_mm_mul_epi32(z_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4() 75 const __m128i w_product_odd = _mm_add_epi64(_mm_mul_epi32(w_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
|
/external/pdfium/third_party/libopenjpeg20/ |
D | mct.c | 246 lo = _mm_mul_epi32(lo, ry); in opj_mct_encode_real() 247 hi = _mm_mul_epi32(hi, ry); in opj_mct_encode_real() 256 lo = _mm_mul_epi32(lo, gy); in opj_mct_encode_real() 257 hi = _mm_mul_epi32(hi, gy); in opj_mct_encode_real() 266 lo = _mm_mul_epi32(lo, by); in opj_mct_encode_real() 267 hi = _mm_mul_epi32(hi, by); in opj_mct_encode_real() 291 lo = _mm_mul_epi32(lo, ru); in opj_mct_encode_real() 292 hi = _mm_mul_epi32(hi, ru); in opj_mct_encode_real() 301 lo = _mm_mul_epi32(lo, gu); in opj_mct_encode_real() 302 hi = _mm_mul_epi32(hi, gu); in opj_mct_encode_real() [all …]
|
/external/libopus/silk/fixed/x86/ |
D | prefilter_FIX_sse.c | 91 … xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */ in silk_warped_LPC_analysis_filter_FIX_sse4_1() 92 xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 ); in silk_warped_LPC_analysis_filter_FIX_sse4_1() 100 xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa ); in silk_warped_LPC_analysis_filter_FIX_sse4_1() 101 xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb ); in silk_warped_LPC_analysis_filter_FIX_sse4_1()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x4c2-minmax-xop-ld128.c | 202 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128() 203 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128() 204 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128() 205 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128() 207 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128() 208 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128() 209 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128() 210 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
|
D | 4x4c2-minmax-xop-ld64.c | 202 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64() 203 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64() 204 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64() 205 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64() 207 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64() 208 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64() 209 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64() 210 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
|
D | 4x4c2-xw-minmax-xop.c | 195 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop() 196 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop() 197 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop() 198 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop() 200 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop() 201 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop() 202 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop() 203 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop()
|
D | 4x4c2-xw-minmax-sse41.c | 190 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 191 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 192 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 193 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 195 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 196 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 197 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 198 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
|
D | 3x4c8-minmax-xop-ld128.c | 132 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 133 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 134 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 136 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 137 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 138 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
|
D | 3x4c8-minmax-sse41-ld128.c | 127 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 128 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 129 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 131 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 132 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 133 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
|
D | 3x4c8-minmax-xop-ld64.c | 134 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 135 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 136 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 138 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 139 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 140 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
|
D | 3x4c8-xw-minmax-xop.c | 130 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 131 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 132 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 134 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 135 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 136 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
|
D | 3x4c8-minmax-sse41-ld64.c | 129 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 130 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 131 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 133 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 134 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 135 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
|
D | 3x4c8-xw-minmax-sse41.c | 125 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 126 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 127 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 129 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 130 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 131 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x4c8-minmax-xop-ld64.c | 151 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 152 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 153 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 155 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 156 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 157 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
|
D | 3x4c8-minmax-xop-ld128.c | 149 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 150 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 151 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 153 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 154 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 155 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
|
D | 4x4c2-minmax-sse41-ld128.c | 216 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 217 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 218 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 219 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 221 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 222 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 223 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 224 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
|
D | 4x4c2-minmax-sse41-ld64.c | 216 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 217 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 218 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 219 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 221 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 222 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 223 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 224 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
|
D | 4x4c2-minmax-xop-ld128.c | 221 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128() 222 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128() 223 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128() 224 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128() 226 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128() 227 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128() 228 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128() 229 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128()
|
D | 4x4c2-minmax-xop-ld64.c | 221 const __m128i vprod0x02 = _mm_add_epi64(_mm_mul_epi32(vacc0x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64() 222 const __m128i vprod1x02 = _mm_add_epi64(_mm_mul_epi32(vacc1x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64() 223 const __m128i vprod2x02 = _mm_add_epi64(_mm_mul_epi32(vacc2x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64() 224 const __m128i vprod3x02 = _mm_add_epi64(_mm_mul_epi32(vacc3x0123, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64() 226 const __m128i vprod0x13 = _mm_add_epi64(_mm_mul_epi32(vacc0x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64() 227 const __m128i vprod1x13 = _mm_add_epi64(_mm_mul_epi32(vacc1x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64() 228 const __m128i vprod2x13 = _mm_add_epi64(_mm_mul_epi32(vacc2x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64() 229 const __m128i vprod3x13 = _mm_add_epi64(_mm_mul_epi32(vacc3x1133, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
|