/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x4c8-minmax-fp32-sse41-ld128.c | 63 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() local 64 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() 65 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128()
|
D | 1x4c8-minmax-fp32-avx-ld128.c | 63 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128() local 64 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128() 65 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128()
|
D | 1x4c2s4-minmax-fp32-sse41-ld128.c | 64 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() local 65 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() 66 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128()
|
D | 1x4c8-minmax-fp32-xop-ld128.c | 68 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128() local 69 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128() 70 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128()
|
D | 1x4c2s4-minmax-fp32-avx-ld128.c | 64 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() local 65 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() 66 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128()
|
D | 1x4c2s4-minmax-fp32-xop-ld128.c | 69 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() local 70 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() 71 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128()
|
D | 1x4c2s4-minmax-fp32-sse2-ld128.c | 65 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() local 66 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() 67 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() 68 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x4c2s4-minmax-fp32-sse41-ld128.c | 64 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() local 65 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() 66 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128()
|
D | 1x4c2s4-minmax-fp32-avx-ld128.c | 64 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() local 65 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() 66 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128()
|
D | 1x4c8-minmax-fp32-sse41-ld128.c | 63 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() local 64 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() 65 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128()
|
D | 1x4c8-minmax-fp32-xop-ld128.c | 68 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128() local 69 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128() 70 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128()
|
D | 1x4c8-minmax-fp32-avx-ld128.c | 63 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128() local 64 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128() 65 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128()
|
D | 1x4c2s4-minmax-fp32-xop-ld128.c | 69 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() local 70 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() 71 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128()
|
D | 1x4c8-minmax-fp32-ssse3-ld128.c | 64 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128() local 65 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128() 66 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128() 67 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128()
|
D | 1x4c2s4-minmax-fp32-sse2-ld128.c | 65 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() local 66 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() 67 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() 68 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x4c2s4-minmax-fp32-xop-ld128.c | 79 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() local 80 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() 81 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128()
|
D | 1x4c2s4-minmax-fp32-sse41-ld128.c | 74 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() local 75 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() 76 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128()
|
D | 1x4c2s4-minmax-fp32-avx-ld128.c | 74 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() local 75 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() 76 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128()
|
D | 1x4c8-minmax-fp32-sse41-ld128.c | 74 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() local 75 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() 76 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128()
|
D | 1x4c8-minmax-fp32-xop-ld128.c | 79 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128() local 80 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128() 81 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128()
|
D | 1x4c8-minmax-fp32-avx-ld128.c | 74 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128() local 75 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128() 76 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x4c2s4-minmax-fp32-avx-ld128.c | 74 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() local 75 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() 76 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128()
|
D | 1x4c2s4-minmax-fp32-xop-ld128.c | 79 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() local 80 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() 81 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128()
|
D | 1x4c2s4-minmax-fp32-sse41-ld128.c | 74 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() local 75 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() 76 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128()
|
D | 1x4c8-minmax-fp32-xop-ld128.c | 79 const __m128i vb23 = _mm_load_si128((const __m128i*) ((const int8_t*) w + 16)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128() local 80 const __m128i vxb2 = _mm_cvtepi8_epi16(vb23); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128() 81 const __m128i vxb3 = _mm_srai_epi16(_mm_unpackhi_epi8(vb23, vb23), 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128()
|