/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 333 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 338 vprod3xCDc1 = vmlal_s8(vprod3xCDc1, vbCDc1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 342 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 501 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 505 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 352 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 357 vprod3xCDc1 = vmlal_s8(vprod3xCDc1, vbCDc1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 361 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 531 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 535 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 344 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 349 vprod3xCDc1 = vmlal_s8(vprod3xCDc1, vbCDc1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 353 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 519 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 523 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 344 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 349 vprod3xCDc1 = vmlal_s8(vprod3xCDc1, vbCDc1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 353 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 519 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 523 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 255 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 259 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 263 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 267 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 267 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 271 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 263 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 267 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 316 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 321 vprod3xCDc1 = vmlal_s8(vprod3xCDc1, vbCDc1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 325 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 484 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 488 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 327 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 332 vprod3xCDc1 = vmlal_s8(vprod3xCDc1, vbCDc1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 336 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 502 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 506 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 335 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 340 vprod3xCDc1 = vmlal_s8(vprod3xCDc1, vbCDc1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 344 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 514 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 518 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 327 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 332 vprod3xCDc1 = vmlal_s8(vprod3xCDc1, vbCDc1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 336 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 502 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 506 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 238 int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 242 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 246 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 250 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 246 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 250 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 250 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 254 vacc3xCD = vpadalq_s16(vacc3xCD, vprod3xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|