/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c8-minmax-neon-mull-padal.c | 271 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 283 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 367 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 368 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 319 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 331 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 415 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 416 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 328 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 348 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 436 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 437 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 392 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 412 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 500 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 501 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 460 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 472 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 556 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 557 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 567 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 587 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 675 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 676 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c8-minmax-neon-mull-padal.c | 253 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 265 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 349 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 350 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 301 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 313 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 397 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 398 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 308 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 328 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 416 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 417 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 442 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 454 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 538 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 539 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 372 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 392 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 480 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 481 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 547 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 567 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 655 const int32x2_t vsum2xEF = vpadd_s32(vpsum2xE, vpsum2xF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 656 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|