Home
last modified time | relevance | path

Searched refs:vacc2x7 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-rndnu-neon-mull.c79 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local
144 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
161 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
208 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
D3x8c8-minmax-rndnu-neon-mlal.c79 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local
179 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
245 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
262 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
309 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
D3x8c16-minmax-rndnu-neon-mlal.c79 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local
168 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
185 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
231 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
D4x8c8-minmax-rndnu-neon-mull.c85 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local
174 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
192 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
245 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
D4x8c8-minmax-rndnu-neon-mlal.c85 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
218 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
301 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
319 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
372 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
D4x8c16-minmax-rndnu-neon-mlal.c85 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local
206 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
224 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
276 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
D3x16c8-minmax-rndnu-neon-mlal.c95 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
211 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
357 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
438 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
523 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D3x16c8-minmax-rndnu-neon-mull.c95 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
168 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
249 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
334 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c16-minmax-rndnu-neon-mlal.c95 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
200 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
297 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
381 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c8-minmax-rndnu-neon-mull.c101 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
206 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
304 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
401 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mlal.c101 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
258 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
445 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
543 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
640 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c101 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
246 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
368 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
464 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-rndnu-neon-mull.c78 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local
159 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
179 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
226 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
D3x8c8-minmax-rndnu-neon-mlal.c78 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local
194 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
260 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
280 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
327 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
D3x8c16-minmax-rndnu-neon-mlal.c78 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local
183 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
203 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
249 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
D4x8c8-minmax-rndnu-neon-mull.c82 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local
191 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
212 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
265 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
D4x8c8-minmax-rndnu-neon-mlal.c82 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
235 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
318 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
339 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
392 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
D4x8c16-minmax-rndnu-neon-mlal.c82 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local
223 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
244 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
296 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
D3x16c8-minmax-rndnu-neon-mlal.c94 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
226 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
372 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
456 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
541 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D3x16c8-minmax-rndnu-neon-mull.c94 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
183 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
267 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
352 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c16-minmax-rndnu-neon-mlal.c94 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
215 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
315 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
399 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c8-minmax-rndnu-neon-mull.c98 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
223 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
324 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
421 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mlal.c98 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
275 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
462 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
563 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
660 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c98 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
263 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
388 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
484 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()