/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 90 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 94 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 95 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 258 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 261 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 262 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 92 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 94 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 95 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 280 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 282 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 283 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 92 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 94 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 95 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 280 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 282 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 283 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 92 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 100 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 101 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 355 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 359 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 360 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 96 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 100 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 101 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 394 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 396 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 397 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 96 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 100 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 101 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 394 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 396 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 397 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 94 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 106 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 107 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 453 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 457 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 458 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 100 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 106 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 107 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 508 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 510 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 511 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 100 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 106 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 107 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 508 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 510 const __m128i vp0x01234567lo = _mm_mullo_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 511 const __m128i vp0x01234567hi = _mm_mulhi_epi16(vxi0x01234567, vxk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|