Home
last modified time | relevance | path

Searched refs:vacc1x23 (Results 1 – 25 of 46) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-xw-minmax-avx2.c64 __m256i vacc1x23 = vacc0x23; in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() local
85 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
101 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
D2x8c8-minmax-avx2.c64 __m256i vacc1x23 = vacc0x23; in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() local
87 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
105 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
D3x8c8-minmax-avx2.c70 __m256i vacc1x23 = vacc0x23; in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() local
101 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
122 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
D3x8c8-xw-minmax-avx2.c70 __m256i vacc1x23 = vacc0x23; in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() local
99 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
118 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
D2x4c8-xw-minmax-xop.c98 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() local
101 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
D2x4c8-minmax-sse41-ld64.c97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() local
100 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
D2x4c8-xw-minmax-sse41.c93 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() local
96 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
D2x4c8-minmax-xop-ld128.c100 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() local
103 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
D2x4c8-minmax-sse41-ld128.c95 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() local
98 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
D2x4c8-minmax-xop-ld64.c102 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64() local
105 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64()
D2x4c8-minmax-ssse3-ld128.c95 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128() local
98 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128()
D2x4c8-xw-minmax-wasmsimd.c124 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
127 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld128.c128 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
131 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c124 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
127 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D2x4c8-minmax-ssse3-ld64.c97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64() local
100 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64()
D2x4c8-xw-minmax-ssse3.c93 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3() local
96 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-avx2.c65 __m256i vacc1x23 = vacc0x23; in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() local
100 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
120 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
D3x8c8-minmax-avx2.c69 __m256i vacc1x23 = vacc0x23; in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() local
116 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
139 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
D2x4c8-minmax-xop-ld64.c117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() local
120 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
D2x4c8-minmax-xop-ld128.c115 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() local
118 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
D2x4c8-minmax-sse41-ld64.c112 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64() local
115 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64()
D2x4c8-minmax-sse41-ld128.c110 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128() local
113 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128()
D2x4c8-minmax-wasmsimd-ld128.c143 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
146 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-ssse3-ld128.c110 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128() local
113 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128()
D2x4c8-minmax-wasmsimd-ld64.c139 const v128_t vacc1x23 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 2, 6, 3, 7); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
142 const v128_t vprod1x23 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x23, vmultiplier), vrounding); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()

12