Home
last modified time | relevance | path

Searched refs:vprod1xCDEFc0 (Results 1 – 25 of 60) sorted by relevance

123

/external/XNNPACK/src/qs8-igemm/gen/
D2x16c2-minmax-rndnu-neon-mlal-ld1r.c135 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local
138 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
140 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
287 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local
289 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
376 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local
377 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
D2x16c2-minmax-rndnu-neon-mlal-ld4r.c123 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local
126 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
128 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
269 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local
271 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
358 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local
359 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
D2x16c2-minmax-rndnu-neon-mlal-dup.c123 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local
126 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
128 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
269 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local
271 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
358 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local
359 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
D2x16c2-minmax-rndnu-neon-mlal-ld2r.c127 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local
130 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
132 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
275 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local
277 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
364 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local
365 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
D2x16c2s4-minmax-rndnu-neon-mlal.c118 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
121 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
123 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
257 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
259 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
D3x16c2-minmax-rndnu-neon-mlal-ld4r.c148 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local
152 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
155 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
347 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local
350 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
466 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local
467 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
D3x16c2-minmax-rndnu-neon-mlal-dup.c148 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
152 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
155 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
347 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
350 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
466 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
467 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
D3x16c2-minmax-rndnu-neon-mlal-ld1r.c166 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local
170 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
173 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
374 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local
377 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
493 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local
494 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
D3x16c2-minmax-rndnu-neon-mlal-ld2r.c154 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local
158 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
161 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
356 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local
359 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
475 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local
476 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
D3x16c2s4-minmax-rndnu-neon-mlal.c141 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
145 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
148 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
332 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
335 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
D2x16c2-minmax-rndnu-neon-mull-ld1r.c117 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local
119 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
206 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local
207 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
D2x16c2-minmax-rndnu-neon-mull-dup.c111 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local
113 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
200 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local
201 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
D2x16c2-minmax-rndnu-neon-mull-ld4r.c111 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local
113 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
200 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local
201 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
/external/XNNPACK/src/qs8-gemm/gen/
D2x16c2-minmax-rndnu-neon-mlal-ld2r.c113 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local
116 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
118 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
261 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local
263 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
350 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local
351 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
D2x16c2-minmax-rndnu-neon-mlal-ld1r.c121 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local
124 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
126 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
273 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local
275 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
362 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local
363 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
D2x16c2-minmax-rndnu-neon-mlal-dup.c109 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local
112 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
114 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
255 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local
257 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
344 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local
345 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
D2x16c2-minmax-rndnu-neon-mlal-ld4r.c109 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local
112 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
114 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
255 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local
257 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
344 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local
345 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
D2x16c2s4-minmax-rndnu-neon-mlal.c105 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
108 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
110 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
244 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
246 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
D3x16c2-minmax-rndnu-neon-mlal-dup.c132 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
136 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
139 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
331 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
334 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
450 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
451 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
D3x16c2-minmax-rndnu-neon-mlal-ld4r.c132 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local
136 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
139 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
331 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local
334 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
450 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local
451 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
D3x16c2-minmax-rndnu-neon-mlal-ld2r.c138 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local
142 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
145 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
340 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local
343 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
459 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local
460 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
D3x16c2-minmax-rndnu-neon-mlal-ld1r.c150 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local
154 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
157 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
358 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local
361 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
477 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local
478 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
D3x16c2s4-minmax-rndnu-neon-mlal.c126 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
130 vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
133 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
317 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
320 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
D2x16c2-minmax-rndnu-neon-mull-dup.c98 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local
100 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
187 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local
188 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
D2x16c2-minmax-rndnu-neon-mull-ld1r.c104 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local
106 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
193 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local
194 vacc1xCDEF = vpadalq_s16(vacc1xCDEF, vprod1xCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()

123