Home
last modified time | relevance | path

Searched refs:vprod3x67c1 (Results 1 – 25 of 32) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D4x8c4s2-minmax-rndnu-neon-mlal.c201 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
206 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
210 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
292 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
296 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c212 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
217 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
221 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
310 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
314 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c212 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
217 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
221 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
310 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
314 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c220 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
225 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
229 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
322 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
326 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x16c4s2-minmax-rndnu-neon-mlal.c277 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
282 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
286 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
460 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
464 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mlal-dup.c288 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
293 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
297 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
478 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
482 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c296 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
301 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
305 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
490 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
494 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c288 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
293 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
297 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
478 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
482 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x8c4s2-minmax-rndnu-neon-mull.c158 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
162 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c166 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
170 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c166 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
170 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c170 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
174 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
/external/XNNPACK/src/qs8-igemm/gen/
D4x8c4s2-minmax-rndnu-neon-mlal.c218 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
223 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
227 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
309 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
313 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c237 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
242 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
246 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
339 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
343 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c229 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
234 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
238 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
327 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
331 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c229 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
234 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
238 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
327 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
331 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x16c4s2-minmax-rndnu-neon-mlal.c294 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
299 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
303 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
477 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
481 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c313 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
318 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
322 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
507 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
511 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-dup.c305 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
310 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
314 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
495 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
499 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c305 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
310 vprod3x67c1 = vmlal_s8(vprod3x67c1, vb67c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
314 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
495 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
499 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x8c4s2-minmax-rndnu-neon-mull.c175 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
179 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c183 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
187 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c183 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
187 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c187 const int16x8_t vprod3x67c1 = vmull_s8(vb67c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
191 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x16c4s2-minmax-rndnu-neon-mull.c231 int16x8_t vprod3x67c1 = vmull_s8(vb67c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
235 vacc3x67 = vpadalq_s16(vacc3x67, vprod3x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()

12