Home
last modified time | relevance | path

Searched refs:vsum2xCD (Results 1 – 25 of 44) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c8-minmax-rndnu-neon-mull.c252 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
266 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
349 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
351 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c16-minmax-rndnu-neon-mlal.c300 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
313 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
396 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
398 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c8-minmax-rndnu-neon-mull.c307 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
329 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
416 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
418 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c441 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
455 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
538 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
540 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c371 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
392 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
479 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
481 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mull.c254 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
256 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
D4x16c8-minmax-rndnu-neon-mlal.c546 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
568 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
655 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
657 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c331 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
333 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c328 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
330 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D4x16c4s2-minmax-rndnu-neon-mull.c306 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
308 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D3x16c4-minmax-rndnu-neon-mull-dup.c328 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
330 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
D4x16c4-minmax-rndnu-neon-mull-ld2r.c399 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
401 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
D4x16c4-minmax-rndnu-neon-mull-dup.c399 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
401 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
D4x16c4-minmax-rndnu-neon-mull-ld1r.c403 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
405 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c8-minmax-rndnu-neon-mull.c270 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
284 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
367 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
369 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c16-minmax-rndnu-neon-mlal.c318 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
331 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
414 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
416 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c8-minmax-rndnu-neon-mull.c327 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
349 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
436 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
438 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c459 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
473 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
556 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
558 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c391 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
412 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
499 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
501 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mull.c272 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
274 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
D4x16c8-minmax-rndnu-neon-mlal.c566 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
588 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
675 const int32x2_t vsum2xCD = vpadd_s32(vpsum2xC, vpsum2xD); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
677 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D4x16c4s2-minmax-rndnu-neon-mull.c326 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
328 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D3x16c4-minmax-rndnu-neon-mull-dup.c345 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
347 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c345 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
347 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c348 const int32x2_t vsum2xCD = vpadd_s32(vget_low_s32(vacc2xCD), vget_high_s32(vacc2xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
350 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()

12