/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 188 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 193 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 197 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 284 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 288 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 199 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 204 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 208 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 302 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 306 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 199 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 204 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 208 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 302 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 306 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 207 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 212 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 216 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 314 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 318 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 264 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 269 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 273 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 452 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 456 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 275 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 280 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 284 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 470 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 474 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 283 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 288 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 292 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 482 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 486 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 275 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 280 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 284 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 470 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 474 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 150 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 154 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 158 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 162 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 158 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 162 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 162 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 166 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 205 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 210 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 214 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 301 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 305 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 224 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 229 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 233 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 331 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 335 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 216 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 221 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 225 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 319 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 323 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 216 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 221 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 225 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 319 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 323 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 281 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 286 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 290 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 469 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 473 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 300 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 305 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 309 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 499 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 503 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 292 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 297 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 301 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 487 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 491 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 292 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 297 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 301 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 487 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 491 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 167 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 171 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 175 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 179 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 175 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 179 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 179 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 183 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 223 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 227 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|