Home
last modified time | relevance | path

Searched refs:vsum2x45 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-neon-mull-padal.c178 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
185 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
226 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
228 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c202 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
209 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
250 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
252 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c211 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
222 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
265 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
267 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c279 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
286 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
327 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
329 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c243 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
254 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
297 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
299 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c266 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
281 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
352 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
354 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c338 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
349 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
392 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
394 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c314 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
329 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
400 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
402 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c323 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
346 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
421 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
423 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c387 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
410 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
485 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
487 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c455 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
470 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
541 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
543 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c562 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
585 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
660 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
662 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-neon-mull-padal.c160 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
167 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
208 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
210 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c184 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
191 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
232 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
234 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c191 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
202 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
245 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
247 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c261 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
268 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
309 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
311 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c223 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
234 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
277 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
279 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c318 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
329 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
372 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
374 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c248 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
263 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
334 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
336 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c296 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
311 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
382 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
384 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c303 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
326 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
401 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
403 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c437 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
452 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
523 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
525 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c367 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
390 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
465 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
467 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c542 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
565 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
640 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
642 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()