/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 298 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 300 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 301 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 319 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 321 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 322 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 319 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 321 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 322 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 164 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 399 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 401 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 402 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 164 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 436 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 438 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 439 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 164 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 436 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 438 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 439 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 194 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 497 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 499 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 500 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 194 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 550 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 552 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 553 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 194 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 550 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 552 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 553 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|