Home
last modified time | relevance | path

Searched refs:vsum3x45 (Results 1 – 25 of 44) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D4x8c8-minmax-rndnu-neon-mull.c195 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local
205 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
260 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local
262 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
D4x8c16-minmax-rndnu-neon-mlal.c227 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local
236 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
291 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local
293 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
D4x8c8-minmax-rndnu-neon-mlal.c322 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
332 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
387 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
389 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
D4x8c4s2-minmax-rndnu-neon-mull.c198 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
200 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c311 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
331 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
430 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
432 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c255 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
257 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c255 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
257 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c259 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
261 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x16c16-minmax-rndnu-neon-mlal.c375 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
394 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
493 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
495 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D4x8c4s2-minmax-rndnu-neon-mlal.c331 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
333 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x16c8-minmax-rndnu-neon-mlal.c550 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
570 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
669 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
671 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D4x16c4s2-minmax-rndnu-neon-mull.c312 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
314 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c399 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
401 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
/external/XNNPACK/src/qs8-igemm/gen/
D4x8c8-minmax-rndnu-neon-mull.c215 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local
225 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
280 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local
282 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
D4x8c16-minmax-rndnu-neon-mlal.c247 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local
256 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
311 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local
313 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
D4x8c8-minmax-rndnu-neon-mlal.c342 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
352 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
407 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
409 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
D4x8c4s2-minmax-rndnu-neon-mull.c218 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
220 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c331 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
351 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
450 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
452 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D4x16c16-minmax-rndnu-neon-mlal.c395 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
414 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
513 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
515 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c274 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
276 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c274 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
276 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c278 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
280 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x8c4s2-minmax-rndnu-neon-mlal.c351 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
353 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x16c8-minmax-rndnu-neon-mlal.c570 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
590 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
689 const int32x2_t vsum3x45 = vpadd_s32(vpsum3x4, vpsum3x5); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
691 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D4x16c4s2-minmax-rndnu-neon-mull.c332 const int32x2_t vsum3x45 = vpadd_s32(vget_low_s32(vacc3x45), vget_high_s32(vacc3x45)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
334 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()

12