/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-rndnu-neon-mull.c | 206 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 215 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 271 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 272 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 238 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 246 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 302 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 303 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mlal.c | 345 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 354 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 410 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 411 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 263 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 280 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 340 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 341 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 311 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 327 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 387 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 388 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 2x16c4s2-minmax-rndnu-neon-mull.c | 207 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local 208 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 320 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 345 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 409 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 410 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 452 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 469 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 529 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 530 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 2x16c4-minmax-rndnu-neon-mull-ld2r.c | 261 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local 262 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 384 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 408 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 472 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 473 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 2x16c4-minmax-rndnu-neon-mull-dup.c | 261 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local 262 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
|
D | 2x16c4-minmax-rndnu-neon-mull-ld1r.c | 263 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local 264 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-rndnu-neon-mull.c | 190 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 199 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 255 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 256 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 222 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 230 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 286 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 287 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 245 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 262 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 322 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 323 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 2x16c8-minmax-rndnu-neon-mlal.c | 329 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 338 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 394 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 395 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 293 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 309 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 369 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 370 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 2x16c4s2-minmax-rndnu-neon-mull.c | 191 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local 192 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 300 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 325 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 389 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 390 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 434 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 451 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 511 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 512 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 2x16c4-minmax-rndnu-neon-mull-dup.c | 246 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local 247 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
|
D | 2x16c4-minmax-rndnu-neon-mull-ld1r.c | 248 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local 249 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mull-ld2r.c | 246 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local 247 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 364 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 388 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 452 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 453 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 3x16c4s2-minmax-rndnu-neon-mull.c | 243 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local 244 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
|