/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mull-padal.c | 203 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 213 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 262 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 264 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 235 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 245 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 294 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 296 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 260 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 278 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 331 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 333 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 342 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 352 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 401 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 403 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 308 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 326 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 379 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 381 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 317 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 343 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 400 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 402 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 381 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 407 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 464 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 466 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 449 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 467 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 520 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 522 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 556 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 582 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 639 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 641 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mull-padal.c | 187 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 197 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 246 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 248 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 219 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 229 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 278 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 280 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 242 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 260 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 313 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 315 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 326 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 336 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 385 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 387 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 290 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 308 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 361 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 363 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 297 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 323 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 380 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 382 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 431 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 449 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 502 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 504 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 361 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 387 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 444 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 446 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 536 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 562 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 619 const int32x2_t vsum1x89 = vpadd_s32(vpsum1x8, vpsum1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 621 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|