/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-neon-mull-padal.c | 176 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 184 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 219 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 221 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 200 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 208 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 243 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 245 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 209 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 221 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 258 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 260 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 277 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 285 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 320 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 322 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 241 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 253 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 290 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 292 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 264 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 280 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 345 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 347 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 336 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 348 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 385 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 387 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 312 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 328 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 393 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 395 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 321 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 345 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 414 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 416 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 385 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 409 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 478 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 480 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 453 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 469 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 534 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 536 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 560 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 584 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 653 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 655 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mull-padal.c | 158 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 166 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 201 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 203 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 182 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 190 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 225 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 227 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 189 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 201 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 238 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 240 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 259 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 267 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 302 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 304 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 221 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 233 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 270 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 272 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 316 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 328 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 365 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 367 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 246 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 262 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 327 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 329 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 294 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 310 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 375 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 377 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 301 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 325 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 394 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 396 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 435 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 451 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 516 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 518 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 365 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 389 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 458 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 460 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 540 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 564 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 633 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 635 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|