Home
last modified time | relevance | path

Searched refs:vsum2xAB (Results 1 – 25 of 44) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c8-minmax-rndnu-neon-mull.c251 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
265 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
343 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
344 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c16-minmax-rndnu-neon-mlal.c299 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
312 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
390 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
391 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c8-minmax-rndnu-neon-mull.c306 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
328 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
410 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
411 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c440 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
454 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
532 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
533 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c370 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
391 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
473 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
474 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mull.c252 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
253 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
D4x16c8-minmax-rndnu-neon-mlal.c545 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
567 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
649 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
650 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D3x16c4-minmax-rndnu-neon-mull-dup.c326 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
327 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c326 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
327 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c329 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
330 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
D4x16c4s2-minmax-rndnu-neon-mull.c304 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
305 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D4x16c4-minmax-rndnu-neon-mull-ld2r.c397 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
398 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
D3x16c4s2-minmax-rndnu-neon-mlal.c445 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
446 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mull-dup.c397 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
398 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c8-minmax-rndnu-neon-mull.c269 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
283 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
361 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
362 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c16-minmax-rndnu-neon-mlal.c317 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
330 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
408 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
409 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c8-minmax-rndnu-neon-mull.c326 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
348 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
430 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
431 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c458 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
472 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
550 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
551 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c390 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
411 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
493 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
494 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mull.c270 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
271 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
D4x16c8-minmax-rndnu-neon-mlal.c565 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
587 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
669 const int32x2_t vsum2xAB = vpadd_s32(vpsum2xA, vpsum2xB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
670 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D3x16c4-minmax-rndnu-neon-mull-dup.c343 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
344 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
D4x16c4s2-minmax-rndnu-neon-mull.c324 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
325 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c343 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
344 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c346 const int32x2_t vsum2xAB = vpadd_s32(vget_low_s32(vacc2xAB), vget_high_s32(vacc2xAB)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
347 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()

12