Home
last modified time | relevance | path

Searched refs:vacc3x23 (Results 1 – 25 of 40) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D4x8c4-minmax-rndnu-neon-mull-ld2r.c75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
117 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
154 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
217 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
231 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
253 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
117 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
154 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
217 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
231 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
253 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
121 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
158 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
221 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
235 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
257 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x8c4s2-minmax-rndnu-neon-mlal.c75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
124 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
184 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
244 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
280 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
307 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
329 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x8c4s2-minmax-rndnu-neon-mull.c75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
110 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
146 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
174 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
196 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
195 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
261 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
298 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
361 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
375 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
397 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
195 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
261 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
298 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
361 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
375 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
397 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
142 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
203 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
273 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
310 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
373 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
387 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
409 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mull-ld2r.c87 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
141 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
210 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
333 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
361 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
403 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
D4x16c4-minmax-rndnu-neon-mull-dup.c87 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
141 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
210 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
333 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
361 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
403 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
D4x16c4-minmax-rndnu-neon-mull-ld1r.c87 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
145 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
214 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
337 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
365 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
407 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
D4x16c4s2-minmax-rndnu-neon-mull.c87 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
202 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
268 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
310 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
/external/XNNPACK/src/qs8-igemm/gen/
D4x8c4s2-minmax-rndnu-neon-mlal.c72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local
141 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
201 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
261 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
297 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
327 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
349 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
D4x8c4s2-minmax-rndnu-neon-mull.c72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local
127 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
163 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
194 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
216 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
159 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
220 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
290 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
327 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
390 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
406 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
428 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
151 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
212 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
278 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
315 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
378 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
394 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
416 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
151 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
212 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
278 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
315 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
378 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
394 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
416 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x8c4-minmax-rndnu-neon-mull-ld2r.c72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
171 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
234 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
250 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
272 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
171 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
234 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
250 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
272 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
138 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
175 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
238 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
254 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
276 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x16c4-minmax-rndnu-neon-mull-ld2r.c84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
158 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
227 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
350 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
380 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
422 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
D4x16c4-minmax-rndnu-neon-mull-ld1r.c84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
162 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
231 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
354 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
384 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
426 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
D4x16c4-minmax-rndnu-neon-mull-dup.c84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
158 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
227 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
350 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
380 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
422 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
D4x16c4s2-minmax-rndnu-neon-mull.c84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local
151 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
219 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
288 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
330 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
183 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
296 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
426 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
495 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
618 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
648 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
690 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()

12