/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 137 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 142 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 146 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 278 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 282 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 387 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 388 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-dup.c | 137 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 142 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 146 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 278 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 282 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 387 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 388 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld1r.c | 161 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 166 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 170 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 314 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 318 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 423 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 424 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld2r.c | 145 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 150 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 154 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 290 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 294 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 399 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 400 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r()
|
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 128 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 133 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 137 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 260 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 264 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld1r.c | 129 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() local 133 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() 238 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() local 239 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld4r.c | 117 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() local 121 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() 226 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() local 227 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r()
|
D | 4x8c2-minmax-rndnu-neon-mull-dup.c | 117 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() local 121 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() 226 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() local 227 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld2r.c | 121 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() local 125 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() 230 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() local 231 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 179 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 184 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 188 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 422 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 426 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 611 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 612 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 187 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 192 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 196 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 434 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 438 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 623 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 624 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 179 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 184 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 188 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 422 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 426 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 611 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 612 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c2-minmax-rndnu-neon-mlal-dup.c | 155 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 160 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 164 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 296 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 300 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 405 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 406 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld1r.c | 179 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 184 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 188 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 332 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 336 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 441 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 442 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld2r.c | 163 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 168 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 172 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 308 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 312 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 417 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 418 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 155 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 160 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 164 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 296 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 300 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 405 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 406 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 145 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 150 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 154 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 277 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 281 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld4r.c | 134 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() local 138 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() 243 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() local 244 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld2r.c | 138 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() local 142 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() 247 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() local 248 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld1r.c | 146 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() local 150 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() 255 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() local 256 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mull-dup.c | 134 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() local 138 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() 243 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() local 244 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 205 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 210 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 214 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 452 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 456 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 641 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 642 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 221 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 226 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 230 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 476 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 480 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 665 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 666 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 197 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 202 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 206 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 440 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 444 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 629 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 630 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 197 int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 202 vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 206 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 440 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 444 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 629 const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 630 vacc3x0123 = vpadalq_s16(vacc3x0123, vprod3x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|