/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-neon-mull-padal.c | 177 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 184 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 220 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 221 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 201 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 208 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 244 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 245 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 210 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 221 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 259 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 260 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 278 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 285 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 321 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 322 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 242 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 253 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 291 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 292 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 265 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 280 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 346 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 347 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 337 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 348 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 386 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 387 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 313 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 328 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 394 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 395 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 322 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 345 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 415 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 416 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 386 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 409 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 479 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 480 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 454 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 469 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 535 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 536 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 561 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 584 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 654 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 655 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mull-padal.c | 159 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 166 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 202 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 203 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 183 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 190 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 226 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 227 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 190 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 201 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 239 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 240 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 260 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 267 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 303 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 304 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 222 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 233 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 271 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 272 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 317 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 328 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 366 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 367 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 247 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 262 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 328 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 329 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 295 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 310 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 376 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 377 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 302 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 325 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 395 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 396 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 436 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 451 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 517 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 518 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 366 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 389 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 459 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 460 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 541 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 564 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 634 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 635 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|