/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 329 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 334 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 338 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 500 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 504 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 340 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 345 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 349 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 520 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 524 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 348 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 353 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 357 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 532 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 536 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 340 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 345 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 349 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 520 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 524 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 364 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 369 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 373 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 556 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 560 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2s4-minmax-rndnu-neon-mull.c | 238 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local 242 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 251 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 255 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 259 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 263 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 247 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 251 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 247 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 251 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 346 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 351 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 355 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 517 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 521 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 366 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 371 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 375 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 550 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 554 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 382 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 387 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 391 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 574 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 578 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 358 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 363 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 367 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 538 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 542 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 358 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 363 vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, va3c3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 367 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 538 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 542 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2s4-minmax-rndnu-neon-mull.c | 255 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local 259 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
|
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 264 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 268 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 264 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 268 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 268 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 272 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 276 const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 280 vacc3xCDEF = vpadalq_s16(vacc3xCDEF, vprod3xCDEFc3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|