Home
last modified time | relevance | path

Searched refs:vprod3x45c1 (Results 1 – 25 of 32) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D4x8c4s2-minmax-rndnu-neon-mlal.c188 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
193 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
197 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
284 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
288 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c199 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
204 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
208 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
302 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
306 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c199 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
204 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
208 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
302 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
306 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c207 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
212 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
216 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
314 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
318 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x16c4s2-minmax-rndnu-neon-mlal.c264 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
269 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
273 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
452 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
456 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mlal-dup.c275 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
280 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
284 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
470 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
474 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c283 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
288 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
292 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
482 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
486 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c275 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
280 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
284 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
470 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
474 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x8c4s2-minmax-rndnu-neon-mull.c150 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
154 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c158 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
162 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c158 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
162 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c162 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
166 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
/external/XNNPACK/src/qs8-igemm/gen/
D4x8c4s2-minmax-rndnu-neon-mlal.c205 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
210 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
214 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
301 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
305 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c224 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
229 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
233 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
331 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
335 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c216 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
221 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
225 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
319 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
323 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c216 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
221 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
225 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
319 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
323 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x16c4s2-minmax-rndnu-neon-mlal.c281 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
286 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
290 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
469 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
473 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c300 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
305 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
309 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
499 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
503 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-dup.c292 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
297 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
301 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
487 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
491 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c292 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
297 vprod3x45c1 = vmlal_s8(vprod3x45c1, vb45c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
301 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
487 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
491 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x8c4s2-minmax-rndnu-neon-mull.c167 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
171 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c175 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
179 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c175 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
179 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c179 const int16x8_t vprod3x45c1 = vmull_s8(vb45c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
183 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x16c4s2-minmax-rndnu-neon-mull.c223 int16x8_t vprod3x45c1 = vmull_s8(vb45c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
227 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()

12