/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 108 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 274 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 276 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 277 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 108 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 295 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 297 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 298 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 108 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 295 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 297 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 298 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 122 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 373 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 375 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 376 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 122 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 410 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 412 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 413 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 122 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 410 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 412 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 413 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 136 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 471 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 473 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 474 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 136 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 524 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 526 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 527 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 136 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 524 const __m128i vp1x01234567hi = _mm_mulhi_epi16(vxi1x01234567, vxk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 526 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 527 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|