/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 252 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 257 vprod3xEFc0 = vmlal_s8(vprod3xEFc0, vbEFc0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 261 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 470 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 474 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 629 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 630 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 244 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 249 vprod3xEFc0 = vmlal_s8(vprod3xEFc0, vbEFc0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 253 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 458 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 462 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 617 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 618 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 244 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 249 vprod3xEFc0 = vmlal_s8(vprod3xEFc0, vbEFc0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 253 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 458 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 462 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 617 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 618 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 234 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 239 vprod3xEFc0 = vmlal_s8(vprod3xEFc0, vbEFc0x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 243 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 441 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 445 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 202 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 206 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 361 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 362 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 206 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 210 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 365 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 366 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 202 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 206 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 361 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 362 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 195 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 199 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 227 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 232 vprod3xEFc0 = vmlal_s8(vprod3xEFc0, vbEFc0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 236 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 441 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 445 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 600 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 601 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 235 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 240 vprod3xEFc0 = vmlal_s8(vprod3xEFc0, vbEFc0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 244 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 453 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 457 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 612 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 613 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 227 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 232 vprod3xEFc0 = vmlal_s8(vprod3xEFc0, vbEFc0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 236 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 441 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 445 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 600 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 601 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 217 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 222 vprod3xEFc0 = vmlal_s8(vprod3xEFc0, vbEFc0x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 226 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 424 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 428 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 185 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 189 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 344 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 345 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 185 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 189 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 344 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 345 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 189 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 193 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 348 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 349 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 178 int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 182 vacc3xEF = vpadalq_s16(vacc3xEF, vprod3xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|