/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x4c8-minmax-xop-ld128.c | 68 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128() local 69 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128() 70 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128() 71 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
|
D | 1x4c8-minmax-sse41-ld128.c | 63 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128() local 64 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128() 65 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128() 66 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
|
D | 1x4c2-minmax-xop-ld128.c | 68 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128() local 69 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128() 70 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128() 71 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128()
|
D | 1x4c8-minmax-ssse3-ld128.c | 63 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128() local 64 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128() 65 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128() 66 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128()
|
D | 1x4c2-minmax-sse41-ld128.c | 63 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128() local 64 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128() 65 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128() 66 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128()
|
D | 1x4c8-minmax-sse2-ld128.c | 63 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128() local 64 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128() 65 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128() 66 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128()
|
D | 1x4c2-minmax-ssse3-ld128.c | 63 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128() local 64 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128() 65 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128() 66 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128()
|
D | 1x4c2-minmax-sse2-ld128.c | 63 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128() local 64 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128() 65 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128() 66 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128()
|
D | 2x4c8-minmax-xop-ld128.c | 83 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() local 84 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() 85 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() 86 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
|
D | 2x4c8-minmax-sse41-ld128.c | 78 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() local 79 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() 80 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() 81 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 68 const v128_t vb23 = wasm_v128_load((const void*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 69 const v128_t vxb2 = wasm_i16x8_widen_low_i8x16(vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 70 const v128_t vxb3 = wasm_i16x8_widen_high_i8x16(vb23); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-ssse3-ld128.c | 78 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128() local 79 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128() 80 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128() 81 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128()
|
D | 3x4c8-minmax-xop-ld128.c | 98 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() local 99 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 100 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 101 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x4c8-minmax-xop-ld128.c | 79 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128() local 80 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128() 81 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128() 82 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
|
D | 1x4c8-minmax-sse41-ld128.c | 74 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128() local 75 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128() 76 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128() 77 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128()
|
D | 1x4c8-minmax-ssse3-ld128.c | 74 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128() local 75 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128() 76 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128() 77 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128()
|
D | 1x4c8-minmax-sse2-ld128.c | 74 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128() local 75 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128() 76 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128() 77 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128()
|
D | 1x4c2-minmax-xop-ld128.c | 79 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128() local 80 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128() 81 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128() 82 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128()
|
D | 1x4c2-minmax-sse41-ld128.c | 74 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128() local 75 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128() 76 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128() 77 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128()
|
D | 1x4c2-minmax-ssse3-ld128.c | 74 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128() local 75 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128() 76 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128() 77 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128()
|
D | 1x4c2-minmax-sse2-ld128.c | 74 const __m128i vb23 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128() local 75 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128() 76 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128() 77 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128()
|
D | 2x4c8-minmax-xop-ld128.c | 96 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() local 97 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() 98 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() 99 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
|
D | 2x4c8-minmax-sse41-ld128.c | 91 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128() local 92 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128() 93 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128() 94 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128()
|
D | 2x4c8-minmax-ssse3-ld128.c | 91 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128() local 92 const __m128i vsb23 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128() 93 const __m128i vxb2 = _mm_unpacklo_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128() 94 const __m128i vxb3 = _mm_unpackhi_epi8(vb23, vsb23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 79 const v128_t vb23 = wasm_v128_load((const void*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 80 const v128_t vxb2 = wasm_i16x8_widen_low_i8x16(vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 81 const v128_t vxb3 = wasm_i16x8_widen_high_i8x16(vb23); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|