Home
last modified time | relevance | path

Searched refs:vscaled4567 (Results 1 – 25 of 135) sorted by relevance

123456

/external/XNNPACK/src/qc8-dwconv/gen/
Dup8x3-minmax-fp32-sse41-mul16.c97 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() local
103 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16()
107 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16()
110 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16()
165 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() local
170 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16()
174 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16()
177 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16()
Dup8x3-minmax-fp32-sse2-mul16.c100 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() local
106 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16()
110 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16()
113 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16()
171 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() local
176 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16()
180 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16()
183 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16()
Dup16x3-minmax-fp32-xop-mul16-add16.c121 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() local
131 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16()
137 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16()
142 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16()
203 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() local
208 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16()
212 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16()
215 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16()
Dup16x3-minmax-fp32-avx-mul16-add16.c116 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() local
126 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16()
132 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16()
137 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16()
198 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() local
203 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16()
207 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16()
210 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16()
Dup8x9-minmax-fp32-xop-mul16-add16.c196 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local
202 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
206 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
209 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
322 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local
327 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
331 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
334 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
Dup8x9-minmax-fp32-avx-mul16-add16.c191 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local
197 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
201 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
204 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
317 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local
322 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
326 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
329 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
Dup8x9-minmax-fp32-sse41-mul16-add16.c191 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local
197 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
201 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
204 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
317 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local
322 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
326 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
329 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
Dup8x9-minmax-fp32-sse41-mul16.c199 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local
205 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
209 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
212 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
333 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local
338 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
342 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
345 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
Dup8x9-minmax-fp32-sse2-mul16-add16.c196 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local
202 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
206 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
209 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
327 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local
332 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
336 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
339 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
Dup8x9-minmax-fp32-avx-mul16.c199 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local
205 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
209 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
212 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
333 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local
338 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
342 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
345 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
Dup8x9-minmax-fp32-sse2-mul16.c208 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local
214 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
218 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
221 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
351 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local
356 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
360 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
363 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
Dup16x9-minmax-fp32-avx-mul16-add16.c248 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() local
258 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
264 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
269 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
396 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() local
401 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
405 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
408 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup8x9-minmax-fp32-sse41-mul16-add16.c191 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local
195 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
199 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
202 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
315 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16() local
319 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
323 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
326 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16()
Dup8x9-minmax-fp32-avx-mul16-add16.c191 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local
195 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
199 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
202 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
315 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16() local
319 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
323 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
326 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16()
Dup8x9-minmax-fp32-xop-mul16-add16.c196 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local
200 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
204 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
207 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
320 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16() local
324 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
328 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
331 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16()
Dup8x9-minmax-fp32-sse41-mul16.c199 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local
203 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
207 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
210 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
331 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local
335 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
339 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
342 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
Dup8x9-minmax-fp32-avx-mul16.c199 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local
203 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
207 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
210 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
331 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local
335 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
339 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
342 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
Dup8x9-minmax-fp32-sse2-mul16-add16.c196 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local
200 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
204 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
207 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
325 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16() local
329 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
333 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
336 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16()
Dup8x9-minmax-fp32-sse2-mul16.c208 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local
212 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
216 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
219 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
349 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local
353 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
357 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
360 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
Dup16x9-minmax-fp32-xop-mul16-add16.c253 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() local
259 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16()
265 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16()
270 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16()
397 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16() local
401 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16()
405 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16()
408 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16()
Dup16x9-minmax-fp32-sse41-mul16-add16.c248 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() local
254 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16()
260 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16()
265 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16()
392 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16() local
396 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16()
400 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16()
403 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16()
Dup16x9-minmax-fp32-avx-mul16-add16.c248 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() local
254 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
260 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
265 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
392 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16() local
396 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
400 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
403 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16()
/external/XNNPACK/src/qu8-dwconv/gen/
Dup8x9-minmax-fp32-sse41-mul16.c209 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local
213 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
217 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
220 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
349 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16() local
353 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
357 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
360 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16()
Dup8x9-minmax-fp32-avx-mul16.c209 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local
213 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
217 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
220 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
349 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16() local
353 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
357 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
360 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16()
Dup8x9-minmax-fp32-sse2-mul16.c210 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local
214 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
218 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
221 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
351 __m128 vscaled4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16() local
355 vscaled4567 = _mm_mul_ps(vscaled4567, vscale); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
359 vscaled4567 = _mm_min_ps(vscaled4567, voutput_max_less_zero_point); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()
362 vacc4567 = _mm_cvtps_epi32(vscaled4567); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16()

123456