/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up8x3-minmax-fp32-sse41-mul16.c | 97 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() local 103 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() 107 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() 110 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() 165 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() local 170 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() 174 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() 177 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16()
|
D | up8x3-minmax-fp32-sse2-mul16.c | 100 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() local 106 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() 110 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() 113 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() 171 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() local 176 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() 180 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() 183 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16()
|
D | up16x3-minmax-fp32-xop-mul16-add16.c | 121 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() local 131 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() 137 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() 142 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() 203 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() local 208 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() 212 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() 215 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16()
|
D | up16x3-minmax-fp32-avx-mul16-add16.c | 116 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() local 126 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() 132 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() 137 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() 198 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() local 203 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() 207 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() 210 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16()
|
D | up8x9-minmax-fp32-xop-mul16-add16.c | 196 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local 202 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 206 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 209 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 322 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local 327 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 331 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 334 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
|
D | up8x9-minmax-fp32-avx-mul16-add16.c | 191 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local 197 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 201 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 204 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 317 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local 322 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 326 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 329 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
|
D | up8x9-minmax-fp32-sse41-mul16-add16.c | 191 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local 197 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 201 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 204 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 317 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local 322 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 326 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 329 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
|
D | up8x9-minmax-fp32-sse41-mul16.c | 199 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 205 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 209 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 212 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 333 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 338 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 342 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 345 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-fp32-sse2-mul16-add16.c | 196 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local 202 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 206 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 209 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 327 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local 332 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 336 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 339 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
|
D | up8x9-minmax-fp32-avx-mul16.c | 199 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 205 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 209 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 212 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 333 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 338 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 342 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 345 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
|
D | up8x9-minmax-fp32-sse2-mul16.c | 208 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 214 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 218 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 221 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 351 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 356 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 360 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 363 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
|
D | up16x9-minmax-fp32-avx-mul16-add16.c | 248 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() local 258 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 264 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 269 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 396 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() local 401 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 405 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 408 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-fp32-sse41-mul16-add16.c | 191 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local 195 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 199 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 202 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 315 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local 319 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 323 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() 326 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
|
D | up8x9-minmax-fp32-avx-mul16-add16.c | 191 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local 195 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 199 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 202 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 315 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local 319 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 323 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() 326 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
|
D | up8x9-minmax-fp32-xop-mul16-add16.c | 196 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local 200 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 204 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 207 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 320 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local 324 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 328 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() 331 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
|
D | up8x9-minmax-fp32-sse41-mul16.c | 199 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 203 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 207 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 210 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 331 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 335 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 339 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 342 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-fp32-avx-mul16.c | 199 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 203 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 207 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 210 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 331 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 335 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 339 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 342 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
|
D | up8x9-minmax-fp32-sse2-mul16-add16.c | 196 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local 200 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 204 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 207 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 325 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local 329 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 333 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() 336 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
|
D | up8x9-minmax-fp32-sse2-mul16.c | 208 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 212 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 216 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 219 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 349 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 353 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 357 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 360 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
|
D | up16x9-minmax-fp32-xop-mul16-add16.c | 253 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() local 259 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() 265 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() 270 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() 397 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() local 401 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() 405 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() 408 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16()
|
D | up16x9-minmax-fp32-sse41-mul16-add16.c | 248 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() local 254 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() 260 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() 265 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() 392 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() local 396 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() 400 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() 403 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16()
|
D | up16x9-minmax-fp32-avx-mul16-add16.c | 248 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() local 254 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 260 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 265 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 392 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() local 396 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 400 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() 403 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
|
/external/XNNPACK/src/qu8-dwconv/gen/ |
D | up8x9-minmax-fp32-sse41-mul16.c | 209 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 213 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 217 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 220 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 349 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local 353 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 357 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() 360 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-fp32-avx-mul16.c | 209 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 213 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 217 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 220 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 349 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local 353 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 357 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() 360 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
|
D | up8x9-minmax-fp32-sse2-mul16.c | 210 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 214 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 218 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 221 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 351 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local 355 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 359 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() 362 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
|