Home
last modified time | relevance | path

Searched refs:vacc2x13 (Results 1 – 22 of 22) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x4c8-xw-minmax-wasmsimd.c134 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
138 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c138 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
142 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c134 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
138 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x4c8-minmax-sse2-ld64.c116 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() local
120 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
D3x4c8-minmax-sse2-ld128.c114 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() local
118 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
D3x4c8-xw-minmax-sse2.c112 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() local
116 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
D3x16c8-minmax-neon-mull-padal.c101 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
210 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
252 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
345 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c101 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
271 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
399 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
441 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
534 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c101 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
254 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
300 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
393 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c107 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
260 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
307 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
412 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mlal-padal.c107 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
336 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
499 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
546 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
651 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c107 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
318 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
371 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
476 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-wasmsimd-ld128.c155 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
159 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c151 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
155 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x4c8-minmax-sse2-ld128.c131 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() local
135 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
D3x4c8-minmax-sse2-ld64.c133 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() local
137 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
D3x16c8-minmax-neon-mull-padal.c100 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
225 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
270 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
363 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c100 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
286 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
414 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
459 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
552 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c100 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
269 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
318 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
411 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c104 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
277 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
327 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
432 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mlal-padal.c104 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
353 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
516 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
566 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
671 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c104 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
335 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
391 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
496 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()