/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 165 …__m512i vout0123x0123456789ABCDEF = _mm512_shuffle_epi8(vout0123x084Cx195Dx2A6Ex3B7F, _mm512_set_e… in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 166 vout0123x0123456789ABCDEF = _mm512_max_epi8(vout0123x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 169 _mm_storeu_si128((__m128i*) c0, _mm512_castsi512_si128(vout0123x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 170 _mm_storeu_si128((__m128i*) c1, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 1)); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 171 _mm_storeu_si128((__m128i*) c2, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 2)); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 172 _mm_storeu_si128((__m128i*) c3, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 3)); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 189 _mm512_mask_storeu_epi8(c0, vmask, vout0123x0123456789ABCDEF); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 191 _mm512_mask_storeu_epi8(c1 - 16, vmask, vout0123x0123456789ABCDEF); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 193 _mm512_mask_storeu_epi8(c2 - 32, vmask, vout0123x0123456789ABCDEF); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 195 _mm512_mask_storeu_epi8(c3 - 48, vmask, vout0123x0123456789ABCDEF); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 167 …__m512i vout0123x0123456789ABCDEF = _mm512_shuffle_epi8(vout0123x084Cx195Dx2A6Ex3B7F, _mm512_set_e… in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 168 vout0123x0123456789ABCDEF = _mm512_max_epi8(vout0123x0123456789ABCDEF, voutput_min); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 171 _mm_storeu_si128((__m128i*) c0, _mm512_castsi512_si128(vout0123x0123456789ABCDEF)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 172 _mm_storeu_si128((__m128i*) c1, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 1)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 173 _mm_storeu_si128((__m128i*) c2, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 2)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 174 _mm_storeu_si128((__m128i*) c3, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 3)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 191 _mm512_mask_storeu_epi8(c0, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 193 _mm512_mask_storeu_epi8(c1 - 16, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 195 _mm512_mask_storeu_epi8(c2 - 32, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 197 _mm512_mask_storeu_epi8(c3 - 48, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
/external/XNNPACK/src/qu8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 183 …__m512i vout0123x0123456789ABCDEF = _mm512_shuffle_epi8(vout0123x084Cx195Dx2A6Ex3B7F, _mm512_set_e… in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 184 vout0123x0123456789ABCDEF = _mm512_max_epu8(vout0123x0123456789ABCDEF, voutput_min); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 187 _mm_storeu_si128((__m128i*) c3, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 3)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 188 _mm_storeu_si128((__m128i*) c2, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 2)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 189 _mm_storeu_si128((__m128i*) c1, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 1)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 190 _mm_storeu_si128((__m128i*) c0, _mm512_castsi512_si128(vout0123x0123456789ABCDEF)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 204 _mm512_mask_storeu_epi8(c3 - 48, vmask, vout0123x0123456789ABCDEF); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 206 _mm512_mask_storeu_epi8(c2 - 32, vmask, vout0123x0123456789ABCDEF); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 208 _mm512_mask_storeu_epi8(c1 - 16, vmask, vout0123x0123456789ABCDEF); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 210 _mm512_mask_storeu_epi8(c0, vmask, vout0123x0123456789ABCDEF); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 182 …__m512i vout0123x0123456789ABCDEF = _mm512_shuffle_epi8(vout0123x084Cx195Dx2A6Ex3B7F, _mm512_set_e… in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 183 vout0123x0123456789ABCDEF = _mm512_max_epi8(vout0123x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 186 _mm_storeu_si128((__m128i*) c3, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 3)); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 187 _mm_storeu_si128((__m128i*) c2, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 2)); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 188 _mm_storeu_si128((__m128i*) c1, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 1)); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 189 _mm_storeu_si128((__m128i*) c0, _mm512_castsi512_si128(vout0123x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 203 _mm512_mask_storeu_epi8(c3 - 48, vmask, vout0123x0123456789ABCDEF); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 205 _mm512_mask_storeu_epi8(c2 - 32, vmask, vout0123x0123456789ABCDEF); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 207 _mm512_mask_storeu_epi8(c1 - 16, vmask, vout0123x0123456789ABCDEF); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 209 _mm512_mask_storeu_epi8(c0, vmask, vout0123x0123456789ABCDEF); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 166 …__m512i vout0123x0123456789ABCDEF = _mm512_shuffle_epi8(vout0123x084Cx195Dx2A6Ex3B7F, _mm512_set_e… in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 167 vout0123x0123456789ABCDEF = _mm512_max_epu8(vout0123x0123456789ABCDEF, voutput_min); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 170 _mm_storeu_si128((__m128i*) c0, _mm512_castsi512_si128(vout0123x0123456789ABCDEF)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 171 _mm_storeu_si128((__m128i*) c1, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 1)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 172 _mm_storeu_si128((__m128i*) c2, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 2)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 173 _mm_storeu_si128((__m128i*) c3, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 3)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 190 _mm512_mask_storeu_epi8(c0, vmask, vout0123x0123456789ABCDEF); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 192 _mm512_mask_storeu_epi8(c1 - 16, vmask, vout0123x0123456789ABCDEF); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 194 _mm512_mask_storeu_epi8(c2 - 32, vmask, vout0123x0123456789ABCDEF); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 196 _mm512_mask_storeu_epi8(c3 - 48, vmask, vout0123x0123456789ABCDEF); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 184 …__m512i vout0123x0123456789ABCDEF = _mm512_shuffle_epi8(vout0123x084Cx195Dx2A6Ex3B7F, _mm512_set_e… in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 185 vout0123x0123456789ABCDEF = _mm512_max_epi8(vout0123x0123456789ABCDEF, voutput_min); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 188 _mm_storeu_si128((__m128i*) c3, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 3)); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 189 _mm_storeu_si128((__m128i*) c2, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 2)); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 190 _mm_storeu_si128((__m128i*) c1, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 1)); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 191 _mm_storeu_si128((__m128i*) c0, _mm512_castsi512_si128(vout0123x0123456789ABCDEF)); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 205 _mm512_mask_storeu_epi8(c3 - 48, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 207 _mm512_mask_storeu_epi8(c2 - 32, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 209 _mm512_mask_storeu_epi8(c1 - 16, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 211 _mm512_mask_storeu_epi8(c0, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
/external/XNNPACK/src/amalgam/ |
D | avx512skx.c | 1628 …__m512i vout0123x0123456789ABCDEF = _mm512_shuffle_epi8(vout0123x084Cx195Dx2A6Ex3B7F, _mm512_set_e… in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 1629 vout0123x0123456789ABCDEF = _mm512_max_epi8(vout0123x0123456789ABCDEF, voutput_min); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 1632 _mm_storeu_si128((__m128i*) c0, _mm512_castsi512_si128(vout0123x0123456789ABCDEF)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 1633 _mm_storeu_si128((__m128i*) c1, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 1)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 1634 _mm_storeu_si128((__m128i*) c2, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 2)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 1635 _mm_storeu_si128((__m128i*) c3, _mm512_extracti32x4_epi32(vout0123x0123456789ABCDEF, 3)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 1652 _mm512_mask_storeu_epi8(c0, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 1654 _mm512_mask_storeu_epi8(c1 - 16, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 1656 _mm512_mask_storeu_epi8(c2 - 32, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 1658 _mm512_mask_storeu_epi8(c3 - 48, vmask, vout0123x0123456789ABCDEF); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() [all …]
|