Home
last modified time | relevance | path

Searched refs:vprod3x01c1 (Results 1 – 25 of 32) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D4x8c4s2-minmax-rndnu-neon-mlal.c162 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
167 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
171 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
268 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
272 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c173 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
178 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
182 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
286 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
290 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c173 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
178 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
182 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
286 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
290 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c181 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
186 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
190 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
298 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
302 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x16c4s2-minmax-rndnu-neon-mlal.c238 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
243 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
247 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
436 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
440 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mlal-dup.c249 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
254 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
258 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
454 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
458 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c257 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
262 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
266 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
466 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
470 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c249 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
254 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
258 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
454 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
458 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x8c4s2-minmax-rndnu-neon-mull.c134 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
138 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c142 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
146 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c142 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
146 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c146 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
150 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
/external/XNNPACK/src/qs8-igemm/gen/
D4x8c4s2-minmax-rndnu-neon-mlal.c179 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
184 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
188 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
285 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
289 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c198 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
203 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
207 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
315 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
319 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c190 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
195 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
199 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
303 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
307 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c190 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
195 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
199 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
303 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
307 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x16c4s2-minmax-rndnu-neon-mlal.c255 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
260 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
264 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
453 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local
457 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c274 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
279 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
283 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
483 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
487 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-dup.c266 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
271 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
275 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
471 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
475 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c266 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
271 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
275 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
471 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
475 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x8c4s2-minmax-rndnu-neon-mull.c151 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
155 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c159 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
163 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c159 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
163 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c163 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
167 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x16c4s2-minmax-rndnu-neon-mull.c207 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
211 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()

12