/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 194 const __m128i vxk8x01234567 = _mm_cvtepi8_epi16(vk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 198 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 199 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 354 const __m128i vxk8x01234567 = _mm_cvtepi8_epi16(vk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 357 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 358 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 196 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 198 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 199 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 376 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 378 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 379 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 196 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 198 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 199 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 376 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 378 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 379 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 260 const __m128i vxk8x01234567 = _mm_cvtepi8_epi16(vk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 268 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 269 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 459 const __m128i vxk8x01234567 = _mm_cvtepi8_epi16(vk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 463 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 464 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 264 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 268 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 269 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 498 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 500 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 501 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 264 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 268 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 269 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 498 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 500 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 501 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 326 const __m128i vxk8x01234567 = _mm_cvtepi8_epi16(vk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 338 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 339 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 557 const __m128i vxk8x01234567 = _mm_cvtepi8_epi16(vk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 561 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 562 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 332 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 338 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 339 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 612 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 614 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 615 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 332 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 338 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 339 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 612 …const __m128i vxk8x01234567 = _mm_unpacklo_epi8(vk8x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 614 const __m128i vp8x01234567lo = _mm_mullo_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 615 const __m128i vp8x01234567hi = _mm_mulhi_epi16(vxi8x01234567, vxk8x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|