/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 172 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 333 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 336 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 337 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 172 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 354 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 357 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 358 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 172 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 354 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 357 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 358 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 226 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 231 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 232 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 437 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 440 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 441 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 226 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 231 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 232 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 474 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 477 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 478 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 226 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 231 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 232 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 474 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 477 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 478 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 280 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 287 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 288 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 535 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 538 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 539 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 280 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 287 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 288 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 588 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 591 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 592 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 280 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 287 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 288 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 588 const __m128i vp6x01234567lo = _mm_mullo_epi16(vxi6x01234567, vxk6x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 591 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 592 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|