/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c8-minmax-neon-mull-padal.c | 95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 190 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 268 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 355 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 236 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 379 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 457 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 544 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 224 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 316 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 403 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 232 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 325 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 424 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 288 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 471 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 564 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 663 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 275 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 389 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 488 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c8-minmax-neon-mull-padal.c | 96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 175 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 250 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 337 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 221 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 364 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 439 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 526 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 209 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 298 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 385 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 215 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 305 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 404 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 271 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 454 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 544 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 643 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 258 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 369 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 468 const int32x2_t vpsum2x8 = vadd_s32(vget_low_s32(vacc2x8), vget_high_s32(vacc2x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|