/external/libvpx/libvpx/vp9/common/x86/ |
D | vp9_highbd_iht16x16_add_sse4.c | 52 s1[0] = _mm_sub_epi64(t10[0], t01[0]); in highbd_iadst_butterfly_sse4_1() 53 s1[1] = _mm_sub_epi64(t10[1], t01[1]); in highbd_iadst_butterfly_sse4_1() 91 x8[0] = _mm_sub_epi64(s0[0], s8[0]); in highbd_iadst16_4col_sse4_1() 92 x8[1] = _mm_sub_epi64(s0[1], s8[1]); in highbd_iadst16_4col_sse4_1() 93 x9[0] = _mm_sub_epi64(s1[0], s9[0]); in highbd_iadst16_4col_sse4_1() 94 x9[1] = _mm_sub_epi64(s1[1], s9[1]); in highbd_iadst16_4col_sse4_1() 95 x10[0] = _mm_sub_epi64(s2[0], s10[0]); in highbd_iadst16_4col_sse4_1() 96 x10[1] = _mm_sub_epi64(s2[1], s10[1]); in highbd_iadst16_4col_sse4_1() 97 x11[0] = _mm_sub_epi64(s3[0], s11[0]); in highbd_iadst16_4col_sse4_1() 98 x11[1] = _mm_sub_epi64(s3[1], s11[1]); in highbd_iadst16_4col_sse4_1() [all …]
|
D | vp9_highbd_iht8x8_add_sse4.c | 52 s1[0] = _mm_sub_epi64(t10[0], t01[0]); in highbd_iadst_butterfly_sse4_1() 53 s1[1] = _mm_sub_epi64(t10[1], t01[1]); in highbd_iadst_butterfly_sse4_1() 69 x4[0] = _mm_sub_epi64(s0[0], s4[0]); in highbd_iadst8_sse4_1() 70 x4[1] = _mm_sub_epi64(s0[1], s4[1]); in highbd_iadst8_sse4_1() 71 x5[0] = _mm_sub_epi64(s1[0], s5[0]); in highbd_iadst8_sse4_1() 72 x5[1] = _mm_sub_epi64(s1[1], s5[1]); in highbd_iadst8_sse4_1() 80 x6[0] = _mm_sub_epi64(s2[0], s6[0]); in highbd_iadst8_sse4_1() 81 x6[1] = _mm_sub_epi64(s2[1], s6[1]); in highbd_iadst8_sse4_1() 82 x7[0] = _mm_sub_epi64(s3[0], s7[0]); in highbd_iadst8_sse4_1() 83 x7[1] = _mm_sub_epi64(s3[1], s7[1]); in highbd_iadst8_sse4_1() [all …]
|
D | vp9_highbd_iht4x4_add_sse4.c | 54 t1[0] = _mm_sub_epi64(s1[0], s4[0]); in highbd_iadst4_sse4_1() 55 t1[1] = _mm_sub_epi64(s1[1], s4[1]); in highbd_iadst4_sse4_1() 56 t1[0] = _mm_sub_epi64(t1[0], s6[0]); in highbd_iadst4_sse4_1() 57 t1[1] = _mm_sub_epi64(t1[1], s6[1]); in highbd_iadst4_sse4_1() 70 s3[0] = _mm_sub_epi64(s3[0], s2[0]); in highbd_iadst4_sse4_1() 71 s3[1] = _mm_sub_epi64(s3[1], s2[1]); in highbd_iadst4_sse4_1()
|
/external/XNNPACK/src/qu8-requantization/ |
D | q31-ssse3.c | 87 …const __m128i x_product_even = _mm_sub_epi64(_mm_xor_si128(x_abs_product_even, x_neg_mask_even), x… in xnn_qu8_requantize_q31__ssse3() 88 …const __m128i y_product_even = _mm_sub_epi64(_mm_xor_si128(y_abs_product_even, y_neg_mask_even), y… in xnn_qu8_requantize_q31__ssse3() 89 …const __m128i z_product_even = _mm_sub_epi64(_mm_xor_si128(z_abs_product_even, z_neg_mask_even), z… in xnn_qu8_requantize_q31__ssse3() 90 …const __m128i w_product_even = _mm_sub_epi64(_mm_xor_si128(w_abs_product_even, w_neg_mask_even), w… in xnn_qu8_requantize_q31__ssse3() 107 …const __m128i x_product_odd = _mm_sub_epi64(_mm_xor_si128(x_abs_product_odd, x_neg_mask_odd), x_ne… in xnn_qu8_requantize_q31__ssse3() 108 …const __m128i y_product_odd = _mm_sub_epi64(_mm_xor_si128(y_abs_product_odd, y_neg_mask_odd), y_ne… in xnn_qu8_requantize_q31__ssse3() 109 …const __m128i z_product_odd = _mm_sub_epi64(_mm_xor_si128(z_abs_product_odd, z_neg_mask_odd), z_ne… in xnn_qu8_requantize_q31__ssse3() 110 …const __m128i w_product_odd = _mm_sub_epi64(_mm_xor_si128(w_abs_product_odd, w_neg_mask_odd), w_ne… in xnn_qu8_requantize_q31__ssse3()
|
D | q31-sse2.c | 87 …const __m128i x_product_even = _mm_sub_epi64(_mm_xor_si128(x_abs_product_even, x_neg_mask_even), x… in xnn_qu8_requantize_q31__sse2() 88 …const __m128i y_product_even = _mm_sub_epi64(_mm_xor_si128(y_abs_product_even, y_neg_mask_even), y… in xnn_qu8_requantize_q31__sse2() 89 …const __m128i z_product_even = _mm_sub_epi64(_mm_xor_si128(z_abs_product_even, z_neg_mask_even), z… in xnn_qu8_requantize_q31__sse2() 90 …const __m128i w_product_even = _mm_sub_epi64(_mm_xor_si128(w_abs_product_even, w_neg_mask_even), w… in xnn_qu8_requantize_q31__sse2() 107 …const __m128i x_product_odd = _mm_sub_epi64(_mm_xor_si128(x_abs_product_odd, x_neg_mask_odd), x_ne… in xnn_qu8_requantize_q31__sse2() 108 …const __m128i y_product_odd = _mm_sub_epi64(_mm_xor_si128(y_abs_product_odd, y_neg_mask_odd), y_ne… in xnn_qu8_requantize_q31__sse2() 109 …const __m128i z_product_odd = _mm_sub_epi64(_mm_xor_si128(z_abs_product_odd, z_neg_mask_odd), z_ne… in xnn_qu8_requantize_q31__sse2() 110 …const __m128i w_product_odd = _mm_sub_epi64(_mm_xor_si128(w_abs_product_odd, w_neg_mask_odd), w_ne… in xnn_qu8_requantize_q31__sse2()
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-ssse3.c | 87 …const __m128i x_product_even = _mm_sub_epi64(_mm_xor_si128(x_abs_product_even, x_neg_mask_even), x… in xnn_qs8_requantize_q31__ssse3() 88 …const __m128i y_product_even = _mm_sub_epi64(_mm_xor_si128(y_abs_product_even, y_neg_mask_even), y… in xnn_qs8_requantize_q31__ssse3() 89 …const __m128i z_product_even = _mm_sub_epi64(_mm_xor_si128(z_abs_product_even, z_neg_mask_even), z… in xnn_qs8_requantize_q31__ssse3() 90 …const __m128i w_product_even = _mm_sub_epi64(_mm_xor_si128(w_abs_product_even, w_neg_mask_even), w… in xnn_qs8_requantize_q31__ssse3() 107 …const __m128i x_product_odd = _mm_sub_epi64(_mm_xor_si128(x_abs_product_odd, x_neg_mask_odd), x_ne… in xnn_qs8_requantize_q31__ssse3() 108 …const __m128i y_product_odd = _mm_sub_epi64(_mm_xor_si128(y_abs_product_odd, y_neg_mask_odd), y_ne… in xnn_qs8_requantize_q31__ssse3() 109 …const __m128i z_product_odd = _mm_sub_epi64(_mm_xor_si128(z_abs_product_odd, z_neg_mask_odd), z_ne… in xnn_qs8_requantize_q31__ssse3() 110 …const __m128i w_product_odd = _mm_sub_epi64(_mm_xor_si128(w_abs_product_odd, w_neg_mask_odd), w_ne… in xnn_qs8_requantize_q31__ssse3()
|
D | q31-sse2.c | 87 …const __m128i x_product_even = _mm_sub_epi64(_mm_xor_si128(x_abs_product_even, x_neg_mask_even), x… in xnn_qs8_requantize_q31__sse2() 88 …const __m128i y_product_even = _mm_sub_epi64(_mm_xor_si128(y_abs_product_even, y_neg_mask_even), y… in xnn_qs8_requantize_q31__sse2() 89 …const __m128i z_product_even = _mm_sub_epi64(_mm_xor_si128(z_abs_product_even, z_neg_mask_even), z… in xnn_qs8_requantize_q31__sse2() 90 …const __m128i w_product_even = _mm_sub_epi64(_mm_xor_si128(w_abs_product_even, w_neg_mask_even), w… in xnn_qs8_requantize_q31__sse2() 107 …const __m128i x_product_odd = _mm_sub_epi64(_mm_xor_si128(x_abs_product_odd, x_neg_mask_odd), x_ne… in xnn_qs8_requantize_q31__sse2() 108 …const __m128i y_product_odd = _mm_sub_epi64(_mm_xor_si128(y_abs_product_odd, y_neg_mask_odd), y_ne… in xnn_qs8_requantize_q31__sse2() 109 …const __m128i z_product_odd = _mm_sub_epi64(_mm_xor_si128(z_abs_product_odd, z_neg_mask_odd), z_ne… in xnn_qs8_requantize_q31__sse2() 110 …const __m128i w_product_odd = _mm_sub_epi64(_mm_xor_si128(w_abs_product_odd, w_neg_mask_odd), w_ne… in xnn_qs8_requantize_q31__sse2()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x4c8-xw-minmax-ssse3.c | 141 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 142 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 143 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 157 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 158 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 159 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
|
D | 4x4c2-xw-minmax-sse2.c | 210 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 211 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 212 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 213 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 230 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 231 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 232 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 233 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
|
D | 4x4c2-minmax-sse2-ld128.c | 217 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 218 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 219 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 220 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 237 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 238 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 239 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 240 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
|
D | 4x4c2-minmax-sse2-ld64.c | 217 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 218 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 219 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 220 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 237 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 238 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 239 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 240 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
|
D | 4x4c2-minmax-ssse3-ld64.c | 217 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 218 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 219 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 220 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 237 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 238 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 239 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64() 240 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64()
|
D | 4x4c2-xw-minmax-ssse3.c | 210 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 211 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 212 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 213 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 230 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 231 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 232 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3() 233 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3()
|
D | 4x4c2-minmax-ssse3-ld128.c | 217 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 218 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 219 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 220 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 237 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 238 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 239 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128() 240 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128()
|
D | 3x4c8-minmax-sse2-ld64.c | 145 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 146 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 147 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 161 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 162 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 163 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
|
D | 3x4c8-minmax-sse2-ld128.c | 143 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 144 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 145 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 159 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 160 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 161 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-minmax-ssse3-ld128.c | 143 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 144 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 145 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 159 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 160 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 161 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
|
/external/XNNPACK/src/qu8-gemm/ |
D | 4x4c2-minmax-sse2.c | 222 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 223 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 224 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 225 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 242 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 243 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 244 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 245 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x4c2-minmax-sse2-ld128.c | 236 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 237 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 238 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 239 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 256 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 257 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 258 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 259 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
|
D | 4x4c2-minmax-ssse3-ld64.c | 236 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 237 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 238 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 239 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 256 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 257 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 258 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 259 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
|
D | 4x4c2-minmax-ssse3-ld128.c | 236 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 237 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 238 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 239 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 256 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 257 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 258 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 259 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
|
D | 3x4c8-minmax-ssse3-ld64.c | 162 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 163 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 164 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 178 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 179 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 180 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
|
D | 3x4c8-minmax-sse2-ld128.c | 160 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 161 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 162 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 176 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 177 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 178 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-minmax-ssse3-ld128.c | 160 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 161 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 162 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 176 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 177 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 178 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
|
/external/XNNPACK/src/qu8-igemm/ |
D | 4x4c2-minmax-sse2.c | 206 const __m128i vprod0x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x02, vnmask0x02), vnmask0x02); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 207 const __m128i vprod1x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x02, vnmask1x02), vnmask1x02); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 208 const __m128i vprod2x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x02, vnmask2x02), vnmask2x02); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 209 const __m128i vprod3x02 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x02, vnmask3x02), vnmask3x02); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 226 const __m128i vprod0x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod0x13, vnmask0x13), vnmask0x13); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 227 const __m128i vprod1x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod1x13, vnmask1x13), vnmask1x13); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 228 const __m128i vprod2x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod2x13, vnmask2x13), vnmask2x13); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2() 229 const __m128i vprod3x13 = _mm_sub_epi64(_mm_xor_si128(vabsprod3x13, vnmask3x13), vnmask3x13); in xnn_qu8_igemm_minmax_ukernel_4x4c2__sse2()
|