/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 245 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 249 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 252 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 381 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 384 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 254 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 258 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 261 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 396 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 399 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 254 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 258 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 261 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 396 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 399 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 260 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 264 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 267 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 405 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 408 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 302 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 307 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 311 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 475 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 479 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 313 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 318 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 322 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 493 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 497 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 321 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 326 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 330 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 505 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 509 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 313 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 318 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 322 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 493 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 497 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 3x16c4s2-minmax-rndnu-neon-mull.c | 187 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local 190 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld1r.c | 197 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local 200 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld2r.c | 194 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local 197 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 229 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 233 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 260 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 264 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 267 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 396 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 399 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 275 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 279 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 282 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 420 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 423 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 269 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 273 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 276 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 411 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 414 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 269 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 273 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 276 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 411 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 414 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 319 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 324 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 328 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 492 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 496 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 338 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 343 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 347 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 522 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 526 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 330 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 335 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 339 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 510 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 514 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 330 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 335 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 339 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 510 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 514 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 3x16c4s2-minmax-rndnu-neon-mull.c | 202 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local 205 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 246 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 250 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|
D | 3x16c4-minmax-rndnu-neon-mull-dup.c | 209 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local 212 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld2r.c | 209 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local 212 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld1r.c | 212 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local 215 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
|