/external/rust/crates/quiche/deps/boringssl/src/crypto/poly1305/ |
D | poly1305_vec.c | 232 st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_first_block() 233 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_first_block() 235 st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_first_block() 236 st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); in poly1305_first_block() 312 M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 313 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_blocks() 315 M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 316 M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); in poly1305_blocks() 376 M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 377 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_blocks() [all …]
|
/external/boringssl/src/crypto/poly1305/ |
D | poly1305_vec.c | 236 st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_first_block() 237 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_first_block() 239 st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_first_block() 240 st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); in poly1305_first_block() 316 M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 317 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_blocks() 319 M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 320 M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); in poly1305_blocks() 380 M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 381 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_blocks() [all …]
|
/external/rust/crates/ring/crypto/poly1305/ |
D | poly1305_vec.c | 240 st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_first_block() 241 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_first_block() 243 st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_first_block() 244 st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); in poly1305_first_block() 320 M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 321 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_blocks() 323 M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 324 M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); in poly1305_blocks() 384 M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); in poly1305_blocks() 385 T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); in poly1305_blocks() [all …]
|
/external/XNNPACK/src/qu8-requantization/ |
D | q31-ssse3.c | 117 const __m128i x_q31product_even = _mm_srli_epi64(x_rounded_product_even, 31); in xnn_qu8_requantize_q31__ssse3() 118 const __m128i x_q31product_odd = _mm_srli_epi64(x_rounded_product_odd, 31); in xnn_qu8_requantize_q31__ssse3() 119 const __m128i y_q31product_even = _mm_srli_epi64(y_rounded_product_even, 31); in xnn_qu8_requantize_q31__ssse3() 120 const __m128i y_q31product_odd = _mm_srli_epi64(y_rounded_product_odd, 31); in xnn_qu8_requantize_q31__ssse3() 121 const __m128i z_q31product_even = _mm_srli_epi64(z_rounded_product_even, 31); in xnn_qu8_requantize_q31__ssse3() 122 const __m128i z_q31product_odd = _mm_srli_epi64(z_rounded_product_odd, 31); in xnn_qu8_requantize_q31__ssse3() 123 const __m128i w_q31product_even = _mm_srli_epi64(w_rounded_product_even, 31); in xnn_qu8_requantize_q31__ssse3() 124 const __m128i w_q31product_odd = _mm_srli_epi64(w_rounded_product_odd, 31); in xnn_qu8_requantize_q31__ssse3()
|
D | q31-sse2.c | 117 const __m128i x_q31product_even = _mm_srli_epi64(x_rounded_product_even, 31); in xnn_qu8_requantize_q31__sse2() 118 const __m128i x_q31product_odd = _mm_srli_epi64(x_rounded_product_odd, 31); in xnn_qu8_requantize_q31__sse2() 119 const __m128i y_q31product_even = _mm_srli_epi64(y_rounded_product_even, 31); in xnn_qu8_requantize_q31__sse2() 120 const __m128i y_q31product_odd = _mm_srli_epi64(y_rounded_product_odd, 31); in xnn_qu8_requantize_q31__sse2() 121 const __m128i z_q31product_even = _mm_srli_epi64(z_rounded_product_even, 31); in xnn_qu8_requantize_q31__sse2() 122 const __m128i z_q31product_odd = _mm_srli_epi64(z_rounded_product_odd, 31); in xnn_qu8_requantize_q31__sse2() 123 const __m128i w_q31product_even = _mm_srli_epi64(w_rounded_product_even, 31); in xnn_qu8_requantize_q31__sse2() 124 const __m128i w_q31product_odd = _mm_srli_epi64(w_rounded_product_odd, 31); in xnn_qu8_requantize_q31__sse2()
|
D | q31-sse4.c | 77 const __m128i x_q31product_even = _mm_srli_epi64(x_product_even, 31); in xnn_qu8_requantize_q31__sse4() 79 const __m128i y_q31product_even = _mm_srli_epi64(y_product_even, 31); in xnn_qu8_requantize_q31__sse4() 81 const __m128i z_q31product_even = _mm_srli_epi64(z_product_even, 31); in xnn_qu8_requantize_q31__sse4() 83 const __m128i w_q31product_even = _mm_srli_epi64(w_product_even, 31); in xnn_qu8_requantize_q31__sse4()
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-ssse3.c | 117 const __m128i x_q31product_even = _mm_srli_epi64(x_rounded_product_even, 31); in xnn_qs8_requantize_q31__ssse3() 118 const __m128i x_q31product_odd = _mm_srli_epi64(x_rounded_product_odd, 31); in xnn_qs8_requantize_q31__ssse3() 119 const __m128i y_q31product_even = _mm_srli_epi64(y_rounded_product_even, 31); in xnn_qs8_requantize_q31__ssse3() 120 const __m128i y_q31product_odd = _mm_srli_epi64(y_rounded_product_odd, 31); in xnn_qs8_requantize_q31__ssse3() 121 const __m128i z_q31product_even = _mm_srli_epi64(z_rounded_product_even, 31); in xnn_qs8_requantize_q31__ssse3() 122 const __m128i z_q31product_odd = _mm_srli_epi64(z_rounded_product_odd, 31); in xnn_qs8_requantize_q31__ssse3() 123 const __m128i w_q31product_even = _mm_srli_epi64(w_rounded_product_even, 31); in xnn_qs8_requantize_q31__ssse3() 124 const __m128i w_q31product_odd = _mm_srli_epi64(w_rounded_product_odd, 31); in xnn_qs8_requantize_q31__ssse3()
|
D | q31-sse2.c | 117 const __m128i x_q31product_even = _mm_srli_epi64(x_rounded_product_even, 31); in xnn_qs8_requantize_q31__sse2() 118 const __m128i x_q31product_odd = _mm_srli_epi64(x_rounded_product_odd, 31); in xnn_qs8_requantize_q31__sse2() 119 const __m128i y_q31product_even = _mm_srli_epi64(y_rounded_product_even, 31); in xnn_qs8_requantize_q31__sse2() 120 const __m128i y_q31product_odd = _mm_srli_epi64(y_rounded_product_odd, 31); in xnn_qs8_requantize_q31__sse2() 121 const __m128i z_q31product_even = _mm_srli_epi64(z_rounded_product_even, 31); in xnn_qs8_requantize_q31__sse2() 122 const __m128i z_q31product_odd = _mm_srli_epi64(z_rounded_product_odd, 31); in xnn_qs8_requantize_q31__sse2() 123 const __m128i w_q31product_even = _mm_srli_epi64(w_rounded_product_even, 31); in xnn_qs8_requantize_q31__sse2() 124 const __m128i w_q31product_odd = _mm_srli_epi64(w_rounded_product_odd, 31); in xnn_qs8_requantize_q31__sse2()
|
D | q31-sse4.c | 77 const __m128i x_q31product_even = _mm_srli_epi64(x_product_even, 31); in xnn_qs8_requantize_q31__sse4() 79 const __m128i y_q31product_even = _mm_srli_epi64(y_product_even, 31); in xnn_qs8_requantize_q31__sse4() 81 const __m128i z_q31product_even = _mm_srli_epi64(z_product_even, 31); in xnn_qs8_requantize_q31__sse4() 83 const __m128i w_q31product_even = _mm_srli_epi64(w_product_even, 31); in xnn_qs8_requantize_q31__sse4()
|
/external/webp/src/dsp/ |
D | rescaler_sse2.c | 152 const __m128i D0 = _mm_srli_epi64(frac, 32); in RescalerImportRowShrink_SSE2() 179 const __m128i A2 = _mm_srli_epi64(A0, 32); in LoadDispatchAndMult_SSE2() 180 const __m128i A3 = _mm_srli_epi64(A1, 32); in LoadDispatchAndMult_SSE2() 210 const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX); in ProcessRow_SSE2() 211 const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX); in ProcessRow_SSE2() 269 const __m128i E0 = _mm_srli_epi64(D0, WEBP_RESCALER_RFIX); in RescalerExportRowExpand_SSE2() 270 const __m128i E1 = _mm_srli_epi64(D1, WEBP_RESCALER_RFIX); in RescalerExportRowExpand_SSE2() 271 const __m128i E2 = _mm_srli_epi64(D2, WEBP_RESCALER_RFIX); in RescalerExportRowExpand_SSE2() 272 const __m128i E3 = _mm_srli_epi64(D3, WEBP_RESCALER_RFIX); in RescalerExportRowExpand_SSE2() 305 const __m128i D0 = _mm_srli_epi64(B0, WEBP_RESCALER_RFIX); // = frac in RescalerExportRowShrink_SSE2() [all …]
|
/external/libopus/silk/fixed/x86/ |
D | prefilter_FIX_sse.c | 97 xmm_product1 = _mm_srli_epi64( xmm_product1, 16 ); /* >> 16, zero extending works */ in silk_warped_LPC_analysis_filter_FIX_sse4_1() 98 xmm_product2 = _mm_srli_epi64( xmm_product2, 16 ); in silk_warped_LPC_analysis_filter_FIX_sse4_1() 103 xmm_tempa = _mm_srli_epi64( xmm_tempa, 16 ); in silk_warped_LPC_analysis_filter_FIX_sse4_1() 104 xmm_tempb = _mm_srli_epi64( xmm_tempb, 16 ); in silk_warped_LPC_analysis_filter_FIX_sse4_1()
|
/external/XNNPACK/src/qu8-gemm/ |
D | 4x4c2-minmax-sse2.c | 227 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 228 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 229 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 230 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 247 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 248 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 249 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 250 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 298 *((uint32_t*) c1) = (uint32_t) _mm_cvtsi128_si32(_mm_srli_epi64(vout, 32)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
|
D | 2x4c8-minmax-sse2.c | 132 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_2x4c8__sse2() 133 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_2x4c8__sse2() 144 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_2x4c8__sse2() 145 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qu8_gemm_minmax_ukernel_2x4c8__sse2() 178 *((uint32_t*) c1) = (uint32_t) _mm_cvtsi128_si32(_mm_srli_epi64(vout, 32)); in xnn_qu8_gemm_minmax_ukernel_2x4c8__sse2()
|
/external/XNNPACK/src/qu8-igemm/ |
D | 4x4c2-minmax-sse2.c | 211 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 212 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 213 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 214 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 231 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 232 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 233 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 234 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 281 *((uint32_t*) c1) = (uint32_t) _mm_cvtsi128_si32(_mm_srli_epi64(vout, 32)); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2()
|
/external/pdfium/third_party/libopenjpeg20/ |
D | mct.c | 250 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real() 260 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real() 270 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real() 285 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real() 295 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real() 305 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real() 320 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real() 330 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real() 340 lo = _mm_srli_epi64(lo, 13); in opj_mct_encode_real()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x4c8-xw-minmax-ssse3.c | 145 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 146 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 147 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 161 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 162 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 163 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
|
D | 4x4c2-xw-minmax-sse2.c | 215 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 216 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 217 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 218 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 235 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 236 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 237 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 238 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
|
D | 4x4c2-minmax-sse2-ld128.c | 222 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 223 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 224 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 225 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 242 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 243 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 244 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 245 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
|
D | 4x4c2-minmax-sse2-ld64.c | 222 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 223 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 224 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 225 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 242 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 243 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 244 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 245 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
|
D | 4x4c2-minmax-ssse3-ld64.c | 222 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 223 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 224 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 225 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 242 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 243 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 244 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 245 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64()
|
D | 4x4c2-xw-minmax-ssse3.c | 215 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 216 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 217 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 218 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 235 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 236 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 237 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 238 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3()
|
D | 4x4c2-minmax-ssse3-ld128.c | 222 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 223 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 224 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 225 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 242 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 243 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 244 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 245 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x4c2-minmax-sse2-ld128.c | 241 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 242 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 243 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 244 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 261 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 262 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 263 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 264 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
|
D | 4x4c2-minmax-ssse3-ld64.c | 241 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 242 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 243 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 244 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 261 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 262 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 263 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 264 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
|
D | 4x4c2-minmax-ssse3-ld128.c | 241 const __m128i vq31prod0x02 = _mm_srli_epi64(_mm_add_epi64(vprod0x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 242 const __m128i vq31prod1x02 = _mm_srli_epi64(_mm_add_epi64(vprod1x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 243 const __m128i vq31prod2x02 = _mm_srli_epi64(_mm_add_epi64(vprod2x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 244 const __m128i vq31prod3x02 = _mm_srli_epi64(_mm_add_epi64(vprod3x02, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 261 const __m128i vq31prod0x13 = _mm_srli_epi64(_mm_add_epi64(vprod0x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 262 const __m128i vq31prod1x13 = _mm_srli_epi64(_mm_add_epi64(vprod1x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 263 const __m128i vq31prod2x13 = _mm_srli_epi64(_mm_add_epi64(vprod2x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 264 const __m128i vq31prod3x13 = _mm_srli_epi64(_mm_add_epi64(vprod3x13, vrounding), 31); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
|