Home
last modified time | relevance | path

Searched refs:vprod2xABc1 (Results 1 – 25 of 32) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c4s2-minmax-rndnu-neon-mlal.c245 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
249 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
252 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
381 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
384 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
D3x16c4-minmax-rndnu-neon-mlal-ld2r.c254 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local
258 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
261 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
396 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local
399 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
D3x16c4-minmax-rndnu-neon-mlal-dup.c254 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local
258 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
261 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
396 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local
399 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
D3x16c4-minmax-rndnu-neon-mlal-ld1r.c260 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local
264 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
267 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
405 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local
408 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
D4x16c4s2-minmax-rndnu-neon-mlal.c302 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
307 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
311 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
475 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
479 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mlal-dup.c313 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
318 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
322 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
493 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
497 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c321 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
326 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
330 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
505 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
509 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c313 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
318 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
322 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
493 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
497 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D3x16c4s2-minmax-rndnu-neon-mull.c187 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
190 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c197 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
200 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c194 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
197 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D4x16c4s2-minmax-rndnu-neon-mull.c229 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
233 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c4s2-minmax-rndnu-neon-mlal.c260 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
264 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
267 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
396 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
399 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
D3x16c4-minmax-rndnu-neon-mlal-ld1r.c275 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local
279 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
282 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
420 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local
423 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
D3x16c4-minmax-rndnu-neon-mlal-ld2r.c269 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local
273 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
276 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
411 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local
414 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
D3x16c4-minmax-rndnu-neon-mlal-dup.c269 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local
273 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
276 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
411 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local
414 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
D4x16c4s2-minmax-rndnu-neon-mlal.c319 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
324 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
328 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
492 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
496 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c338 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
343 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
347 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
522 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
526 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-dup.c330 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
335 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
339 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
510 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
514 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c330 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
335 vprod2xABc1 = vmlal_s8(vprod2xABc1, vbABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
339 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
510 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
514 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D3x16c4s2-minmax-rndnu-neon-mull.c202 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull() local
205 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull()
D4x16c4s2-minmax-rndnu-neon-mull.c246 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
250 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D3x16c4-minmax-rndnu-neon-mull-dup.c209 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
212 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c209 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
212 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c212 const int16x8_t vprod2xABc1 = vmull_s8(vbABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
215 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()

12