/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x4c2s4-xw-minmax-fp32-xop.c | 116 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__xop() local 121 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__xop() 126 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__xop() 130 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__xop()
|
D | 3x4c2s4-minmax-fp32-avx-ld64.c | 115 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64() local 120 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64() 125 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64() 129 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64()
|
D | 3x4c2s4-minmax-fp32-xop-ld64.c | 120 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() local 125 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 130 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 134 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64()
|
D | 3x4c2s4-minmax-fp32-sse41-ld64.c | 115 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64() local 120 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64() 125 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64() 129 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64()
|
D | 3x4c2s4-xw-minmax-fp32-avx.c | 111 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__avx() local 116 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__avx() 121 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__avx() 125 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__avx()
|
D | 3x4c2s4-minmax-fp32-xop-ld128.c | 118 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() local 123 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 128 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 132 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128()
|
D | 3x4c2s4-xw-minmax-fp32-sse41.c | 111 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse41() local 116 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse41() 121 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse41() 125 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse41()
|
D | 3x4c2s4-minmax-fp32-sse41-ld128.c | 113 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128() local 118 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128() 123 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128() 127 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128()
|
D | 3x4c2s4-minmax-fp32-avx-ld128.c | 113 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128() local 118 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128() 123 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128() 127 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128()
|
D | 3x4c2s4-xw-minmax-fp32-sse2.c | 111 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse2() local 116 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse2() 121 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse2() 125 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2s4__sse2()
|
D | 3x4c8-xw-minmax-fp32-sse41.c | 121 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41() local 126 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41() 131 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41() 135 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x4c2s4-minmax-fp32-xop-ld64.c | 136 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() local 141 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 146 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 150 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64()
|
D | 3x4c2s4-minmax-fp32-xop-ld128.c | 134 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() local 139 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 144 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 148 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128()
|
D | 3x4c2s4-minmax-fp32-sse2-ld128.c | 131 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128() local 136 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128() 141 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128() 145 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 3x4c2s4-minmax-fp32-xop-ld128.c | 134 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() local 140 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 145 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 149 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128()
|
D | 3x4c2s4-minmax-fp32-xop-ld64.c | 136 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() local 142 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 147 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 151 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 3x4c2s4-minmax-fp32-sse41-ld64.c | 115 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64() local 121 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64() 126 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64() 130 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64()
|
D | 3x4c2s4-minmax-fp32-avx-ld128.c | 113 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128() local 119 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128() 124 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128() 128 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128()
|
D | 3x4c2s4-minmax-fp32-xop-ld128.c | 118 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() local 124 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 129 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 133 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128()
|
D | 3x4c2s4-minmax-fp32-xop-ld64.c | 120 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() local 126 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 131 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 135 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64()
|
D | 3x4c2s4-minmax-fp32-sse41-ld128.c | 113 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128() local 119 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128() 124 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128() 128 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128()
|
D | 3x4c2s4-minmax-fp32-avx-ld64.c | 115 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64() local 121 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64() 126 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64() 130 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64()
|
D | 3x4c8-minmax-fp32-sse41-ld128.c | 121 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128() local 127 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128() 132 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128() 136 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 3x4c2s4-minmax-fp32-xop-ld64.c | 121 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() local 126 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 131 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64() 135 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64()
|
D | 3x4c2s4-minmax-fp32-xop-ld128.c | 120 __m128 vscaled2x0123 = _mm_cvtepi32_ps(vacc2x0123); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() local 125 vscaled2x0123 = _mm_mul_ps(vscaled2x0123, vscale); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 130 vscaled2x0123 = _mm_min_ps(vscaled2x0123, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128() 134 vacc2x0123 = _mm_cvtps_epi32(vscaled2x0123); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128()
|