Home
last modified time | relevance | path

Searched refs:vacc3xAB (Results 1 – 16 of 16) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D4x16c4-minmax-rndnu-neon-mull-ld2r.c91 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
173 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
242 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
341 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
363 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
409 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
D4x16c4-minmax-rndnu-neon-mull-dup.c91 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
173 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
242 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
341 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
363 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
409 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
D4x16c4-minmax-rndnu-neon-mull-ld1r.c91 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
177 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
246 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
345 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
367 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
413 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
D4x16c4s2-minmax-rndnu-neon-mull.c91 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
166 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
234 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
270 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
316 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D4x16c4-minmax-rndnu-neon-mlal-dup.c91 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
210 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
323 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
429 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
498 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
597 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
619 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
665 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c91 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
218 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
331 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
441 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
510 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
609 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
631 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
677 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c91 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
210 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
323 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
429 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
498 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
597 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
619 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
665 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x16c4s2-minmax-rndnu-neon-mlal.c91 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
200 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
312 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
412 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
480 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
515 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
561 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
/external/XNNPACK/src/qs8-igemm/gen/
D4x16c4-minmax-rndnu-neon-mull-ld2r.c88 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
190 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
259 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
358 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
382 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
428 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
D4x16c4-minmax-rndnu-neon-mull-ld1r.c88 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
194 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
263 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
362 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
386 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
432 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
D4x16c4-minmax-rndnu-neon-mull-dup.c88 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
190 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
259 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
358 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
382 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
428 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
D4x16c4s2-minmax-rndnu-neon-mull.c88 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
183 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
251 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
290 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
336 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c88 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
235 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
348 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
458 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
527 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
626 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
650 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
696 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-dup.c88 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
227 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
340 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
446 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
515 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
614 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
638 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
684 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c88 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
227 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
340 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
446 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
515 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
614 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
638 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
684 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x16c4s2-minmax-rndnu-neon-mlal.c88 int32x4_t vacc3xAB = vacc0xAB; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
217 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
329 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
429 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
497 vacc3xAB = vpadalq_s16(vacc3xAB, vprod3xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
535 int32x4_t vacc3x89AB = vpaddq_s32(vacc3x89, vacc3xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
581 const int32x2_t vsum3xAB = vpadd_s32(vget_low_s32(vacc3xAB), vget_high_s32(vacc3xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()