/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c8-xw-minmax-avx2.c | 64 __m256i vacc1x23 = vacc0x23; in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() local 85 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 101 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
|
D | 2x8c8-minmax-avx2.c | 64 __m256i vacc1x23 = vacc0x23; in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() local 87 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() 105 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
|
D | 3x8c8-minmax-avx2.c | 70 __m256i vacc1x23 = vacc0x23; in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() local 101 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 122 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
|
D | 3x8c8-xw-minmax-avx2.c | 70 __m256i vacc1x23 = vacc0x23; in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() local 99 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 118 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
|
D | 2x4c8-xw-minmax-xop.c | 98 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() local 101 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
|
D | 2x4c8-minmax-sse41-ld64.c | 97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() local 100 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
|
D | 2x4c8-xw-minmax-sse41.c | 93 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() local 96 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
|
D | 2x4c8-minmax-xop-ld128.c | 100 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() local 103 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
|
D | 2x4c8-minmax-sse41-ld128.c | 95 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() local 98 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
|
D | 2x4c8-minmax-xop-ld64.c | 102 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64() local 105 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64()
|
D | 2x4c8-minmax-ssse3-ld128.c | 95 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128() local 98 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 124 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 127 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 128 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 131 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 124 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 127 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 2x4c8-minmax-ssse3-ld64.c | 97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64() local 100 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64()
|
D | 2x4c8-xw-minmax-ssse3.c | 93 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3() local 96 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x8c8-minmax-avx2.c | 65 __m256i vacc1x23 = vacc0x23; in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() local 100 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() 120 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
|
D | 3x8c8-minmax-avx2.c | 69 __m256i vacc1x23 = vacc0x23; in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() local 116 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 139 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
|
D | 2x4c8-minmax-xop-ld64.c | 117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() local 120 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
|
D | 2x4c8-minmax-xop-ld128.c | 115 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() local 118 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
|
D | 2x4c8-minmax-sse41-ld64.c | 112 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64() local 115 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64()
|
D | 2x4c8-minmax-sse41-ld128.c | 110 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128() local 113 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 143 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 146 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-ssse3-ld128.c | 110 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128() local 113 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 139 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 142 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|