Home
last modified time | relevance | path

Searched refs:vsum2x23 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-neon-mull-padal.c177 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
184 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
220 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
221 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c201 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
208 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
244 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
245 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c210 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
221 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
259 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
260 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c278 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
285 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
321 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
322 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c242 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
253 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
291 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
292 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c265 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
280 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
346 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
347 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c337 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
348 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
386 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
387 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c313 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
328 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
394 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
395 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c322 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
345 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
415 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
416 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c386 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
409 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
479 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
480 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c454 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
469 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
535 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
536 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c561 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
584 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
654 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
655 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-neon-mull-padal.c159 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
166 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
202 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
203 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c183 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
190 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
226 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
227 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c190 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
201 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
239 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
240 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c260 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
267 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
303 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
304 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c222 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
233 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
271 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
272 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c317 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
328 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
366 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
367 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c247 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
262 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
328 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
329 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c295 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
310 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
376 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
377 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c302 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
325 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
395 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
396 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c436 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
451 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
517 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
518 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c366 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
389 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
459 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
460 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c541 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
564 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
634 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
635 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()