Home
last modified time | relevance | path

Searched refs:vsum1xEF (Results 1 – 25 of 66) sorted by relevance

123

/external/XNNPACK/src/qs8-igemm/gen/
D2x16c8-minmax-rndnu-neon-mull.c206 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
215 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
271 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
272 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
D2x16c16-minmax-rndnu-neon-mlal.c238 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
246 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
302 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
303 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
D2x16c8-minmax-rndnu-neon-mlal.c345 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
354 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
410 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
411 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
D3x16c8-minmax-rndnu-neon-mull.c263 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
280 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
340 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
341 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c16-minmax-rndnu-neon-mlal.c311 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
327 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
387 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
388 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D2x16c4s2-minmax-rndnu-neon-mull.c207 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local
208 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c320 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
345 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
409 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
410 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c452 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
469 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
529 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
530 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D2x16c4-minmax-rndnu-neon-mull-ld2r.c261 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
262 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
D4x16c16-minmax-rndnu-neon-mlal.c384 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
408 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
472 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
473 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D2x16c4-minmax-rndnu-neon-mull-dup.c261 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
262 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
D2x16c4-minmax-rndnu-neon-mull-ld1r.c263 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
264 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
/external/XNNPACK/src/qs8-gemm/gen/
D2x16c8-minmax-rndnu-neon-mull.c190 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
199 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
255 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
256 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
D2x16c16-minmax-rndnu-neon-mlal.c222 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
230 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
286 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
287 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
D3x16c8-minmax-rndnu-neon-mull.c245 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
262 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
322 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
323 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D2x16c8-minmax-rndnu-neon-mlal.c329 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
338 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
394 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
395 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
D3x16c16-minmax-rndnu-neon-mlal.c293 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
309 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
369 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
370 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D2x16c4s2-minmax-rndnu-neon-mull.c191 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local
192 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c300 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
325 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
389 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
390 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c434 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
451 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
511 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
512 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D2x16c4-minmax-rndnu-neon-mull-dup.c246 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
247 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
D2x16c4-minmax-rndnu-neon-mull-ld1r.c248 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
249 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
D2x16c4-minmax-rndnu-neon-mull-ld2r.c246 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
247 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
D4x16c16-minmax-rndnu-neon-mlal.c364 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
388 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
452 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
453 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mull.c243 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
244 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()

123