/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 201 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 206 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 210 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 292 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 296 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 212 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 217 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 221 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 310 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 314 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 212 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 217 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 221 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 310 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 314 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 220 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 225 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 229 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 322 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 326 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 277 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 282 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 286 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 460 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 464 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 288 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 293 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 297 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 478 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 482 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 296 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 301 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 305 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 490 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 494 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 288 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 293 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 297 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 478 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 482 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 158 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 162 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 166 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 170 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 166 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 170 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 170 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 174 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 218 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 223 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 227 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 309 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 313 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 237 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 242 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 246 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 339 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 343 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 229 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 234 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 238 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 327 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 331 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 229 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 234 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 238 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 327 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 331 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 294 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 299 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 303 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 477 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 481 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 313 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 318 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 322 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 507 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 511 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 305 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 310 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 314 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 495 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 499 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 305 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 310 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 314 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 495 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 499 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 175 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 179 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 183 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 187 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 183 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 187 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 187 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 191 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 231 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 235 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|