/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-neon-mull-padal.c | 178 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 185 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 226 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 228 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 202 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 209 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 250 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 252 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 211 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 222 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 265 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 267 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 279 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 286 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 327 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 329 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 243 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 254 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 297 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 299 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 266 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 281 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 352 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 354 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 338 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 349 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 392 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 394 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 314 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 329 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 400 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 402 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 323 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 346 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 421 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 423 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 387 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 410 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 485 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 487 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 455 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 470 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 541 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 543 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 562 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 585 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 660 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 662 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mull-padal.c | 160 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 167 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 208 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 210 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 184 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 191 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 232 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 234 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 191 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 202 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 245 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 247 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 261 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 268 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 309 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 311 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 223 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 234 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 277 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 279 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 318 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 329 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 372 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 374 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 248 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 263 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 334 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 336 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 296 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 311 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 382 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 384 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 303 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 326 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 401 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 403 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 437 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 452 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 523 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 525 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 367 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 390 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 465 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 467 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 542 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 565 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 640 const int32x2_t vsum2x45 = vpadd_s32(vpsum2x4, vpsum2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 642 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|