/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 219 const __m128i vq31prod57 = _mm_add_epi64(vprod57, vprod57); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 222 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 379 const __m128i vq31prod57 = _mm_add_epi64(vprod57, vprod57); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 382 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 235 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 240 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 415 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 420 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 235 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 240 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 415 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 420 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 299 const __m128i vq31prod57 = _mm_add_epi64(vprod57, vprod57); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 306 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 487 const __m128i vq31prod57 = _mm_add_epi64(vprod57, vprod57); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 490 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 327 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 336 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 539 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 544 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 327 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 336 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 539 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 544 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 379 const __m128i vq31prod57 = _mm_add_epi64(vprod57, vprod57); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 390 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 585 const __m128i vq31prod57 = _mm_add_epi64(vprod57, vprod57); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 588 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 419 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 432 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 653 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 658 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 419 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 432 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 653 const __m128i vq31prod57 = _mm_srli_epi64(_mm_add_epi64(vprod57, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 658 _mm_castsi128_ps(vq31prod46), _mm_castsi128_ps(vq31prod57), _MM_SHUFFLE(2, 0, 2, 0))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|