Home
last modified time | relevance | path

Searched refs:vsum2x67 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-neon-mull-padal.c179 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
185 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
227 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
228 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c203 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
209 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
251 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
252 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c212 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
222 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
266 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
267 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c280 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
286 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
328 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
329 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c244 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
254 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
298 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
299 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c267 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
281 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
353 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
354 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c339 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
349 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
393 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
394 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c315 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
329 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
401 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
402 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c324 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
346 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
422 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
423 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c388 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
410 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
486 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
487 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c456 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
470 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
542 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
543 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c563 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
585 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
661 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
662 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-neon-mull-padal.c161 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
167 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
209 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
210 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c185 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
191 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
233 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
234 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c192 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
202 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
246 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
247 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c262 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
268 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
310 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
311 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c224 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
234 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
278 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
279 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c319 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
329 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
373 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
374 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c249 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
263 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
335 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
336 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c297 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
311 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
383 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
384 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c304 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
326 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
402 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
403 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c438 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
452 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
524 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
525 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c368 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
390 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
466 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
467 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c543 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
565 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
641 const int32x2_t vsum2x67 = vpadd_s32(vpsum2x6, vpsum2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
642 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()