/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse2-mul16.c | 209 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 212 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vnmask0123), vnmask0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 222 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 223 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 388 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 391 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vnmask0123), vnmask0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 402 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 403 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 283 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 288 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vnmask0123), vnmask0123); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 306 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 307 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 512 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 515 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vnmask0123), vnmask0123); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 526 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 527 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 209 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 222 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 223 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 388 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 402 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 403 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 357 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 364 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vnmask0123), vnmask0123); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 390 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 391 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 626 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 629 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vnmask0123), vnmask0123); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 640 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 641 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 283 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 306 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 307 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 512 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 526 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 527 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 357 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 390 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 391 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 626 const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 640 const __m128i vnmask02 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 641 const __m128i vnmask13 = _mm_shuffle_epi32(vnmask0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|