/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-fp32-sse2-mul16.c | 158 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 160 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 161 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 303 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 305 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 306 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-fp32-sse41-mul16-add16.c | 145 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local 149 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 274 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local 277 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
|
D | up8x9-minmax-fp32-avx-mul16-add16.c | 145 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local 149 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 274 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local 277 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
|
D | up8x9-minmax-fp32-xop-mul16-add16.c | 150 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local 154 vprod01234567 = _mm_macc_epi16(vxi5x01234567, vxk5x01234567, vprod01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 279 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local 282 vprod01234567 = _mm_macc_epi16(vxi5x01234567, vxk5x01234567, vprod01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
|
D | up16x9-minmax-fp32-sse2-mul16.c | 202 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() local 206 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 207 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 394 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() local 396 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 397 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16()
|
D | up8x9-minmax-fp32-sse41-mul16.c | 151 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 155 vprod01234567 = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 288 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 291 vprod01234567 = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-fp32-avx-mul16.c | 151 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 155 vprod01234567 = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 288 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 291 vprod01234567 = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
|
D | up8x9-minmax-fp32-sse2-mul16-add16.c | 149 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local 151 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 282 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local 284 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
|
D | up16x9-minmax-fp32-xop-mul16-add16.c | 181 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() local 189 vprod01234567 = _mm_macc_epi16(vxi5x01234567, vxk5x01234567, vprod01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() 351 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() local 355 vprod01234567 = _mm_macc_epi16(vxi5x01234567, vxk5x01234567, vprod01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16()
|
D | up24x9-minmax-fp32-sse2-mul16.c | 246 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() local 252 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() 253 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() 480 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() local 482 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() 483 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16()
|
/external/XNNPACK/src/qu8-dwconv/gen/ |
D | up8x9-minmax-fp32-sse41-mul16.c | 157 const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_cvtepu8_epi16(vk5x01234567), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 161 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 162 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 302 const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_cvtepu8_epi16(vk5x01234567), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 305 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 306 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-fp32-avx-mul16.c | 157 const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_cvtepu8_epi16(vk5x01234567), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 161 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 162 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 302 const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_cvtepu8_epi16(vk5x01234567), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 305 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 306 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
|
D | up8x9-minmax-fp32-sse2-mul16.c | 160 …const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_unpacklo_epi8(vk5x01234567, vzero), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 162 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 163 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 305 …const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_unpacklo_epi8(vk5x01234567, vzero), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 307 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 308 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
|
D | up16x9-minmax-fp32-avx-mul16.c | 199 const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_cvtepu8_epi16(vk5x01234567), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16() local 207 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16() 208 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16() 391 const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_cvtepu8_epi16(vk5x01234567), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16() local 395 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16() 396 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16()
|
D | up16x9-minmax-fp32-sse41-mul16.c | 199 const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_cvtepu8_epi16(vk5x01234567), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16() local 207 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16() 208 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16() 391 const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_cvtepu8_epi16(vk5x01234567), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16() local 395 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16() 396 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-fp32-sse2-mul16.c | 204 …const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_unpacklo_epi8(vk5x01234567, vzero), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() local 208 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 209 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 395 …const __m128i vxk5x01234567 = _mm_sub_epi16(_mm_unpacklo_epi8(vk5x01234567, vzero), vk_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() local 397 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 398 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16()
|
/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up8x9-minmax-fp32-sse2-mul16.c | 158 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 160 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 161 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 305 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 307 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 308 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-fp32-xop-mul16-add16.c | 150 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local 154 vprod01234567 = _mm_macc_epi16(vxi5x01234567, vxk5x01234567, vprod01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 281 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local 284 vprod01234567 = _mm_macc_epi16(vxi5x01234567, vxk5x01234567, vprod01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
|
D | up8x9-minmax-fp32-avx-mul16-add16.c | 145 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local 149 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 276 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local 279 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
|
D | up8x9-minmax-fp32-sse41-mul16-add16.c | 145 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local 149 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 276 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local 279 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
|
D | up16x9-minmax-fp32-sse2-mul16.c | 202 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() local 206 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 207 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 398 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() local 400 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16() 401 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16()
|
D | up8x9-minmax-fp32-sse41-mul16.c | 151 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 155 vprod01234567 = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 290 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 293 vprod01234567 = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-fp32-sse2-mul16-add16.c | 149 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local 151 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 284 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local 286 vprod01234567 = _mm_add_epi16(vprod01234567, _mm_mullo_epi16(vxi5x01234567, vxk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
|
D | up8x9-minmax-fp32-avx-mul16.c | 151 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 155 vprod01234567 = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 290 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 293 vprod01234567 = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
|
D | up24x9-minmax-fp32-sse2-mul16.c | 246 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() local 252 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() 253 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() 486 … const __m128i vxk5x01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vk5x01234567, vk5x01234567), 8); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() local 488 const __m128i vprod5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16() 489 const __m128i vprod5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__sse2_mul16()
|