Home
last modified time | relevance | path

Searched refs:vacc1xCD (Results 1 – 25 of 48) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x16c4-minmax-rndnu-neon-mull-dup.c64 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
121 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
156 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
209 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
222 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
245 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
D2x16c4-minmax-rndnu-neon-mull-ld1r.c64 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
123 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
158 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
211 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
224 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
247 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
D2x16c4-minmax-rndnu-neon-mull-ld2r.c64 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
121 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
156 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
209 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
222 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
245 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
D2x16c4s2-minmax-rndnu-neon-mull.c64 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local
116 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
150 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
167 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
190 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
D2x16c4-minmax-rndnu-neon-mlal-ld1r.c64 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local
149 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
210 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
275 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
310 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
363 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
376 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
399 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
D2x16c4-minmax-rndnu-neon-mlal-dup.c64 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local
145 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
206 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
269 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
304 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
357 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
370 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
393 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
D2x16c4-minmax-rndnu-neon-mlal-ld2r.c64 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local
145 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
206 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
269 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
304 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
357 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
370 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
393 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
D2x16c4s2-minmax-rndnu-neon-mlal.c64 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local
139 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
199 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
258 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
292 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
308 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
331 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mlal.c70 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
175 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
261 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
338 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
389 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
408 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
435 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c70 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
153 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
205 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
262 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
292 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
319 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c70 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
150 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
202 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
259 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
289 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
316 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D3x16c4-minmax-rndnu-neon-mull-dup.c70 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
150 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
202 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
259 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
289 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
316 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D2x16c4-minmax-rndnu-neon-mull-ld2r.c65 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
134 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
169 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
222 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
237 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
260 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
D2x16c4s2-minmax-rndnu-neon-mull.c65 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local
129 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
163 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
183 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
206 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
D2x16c4-minmax-rndnu-neon-mlal-dup.c65 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local
158 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
219 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
282 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
317 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
370 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
385 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
408 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
D2x16c4-minmax-rndnu-neon-mlal-ld1r.c65 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local
162 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
223 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
288 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
323 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
376 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
391 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
414 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
D2x16c4-minmax-rndnu-neon-mlal-ld2r.c65 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local
158 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
219 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
282 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
317 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
370 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
385 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
408 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
D2x16c4-minmax-rndnu-neon-mull-dup.c65 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
134 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
169 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
222 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
237 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
260 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
D2x16c4-minmax-rndnu-neon-mull-ld1r.c65 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
136 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
171 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
224 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
239 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
262 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
D2x16c4s2-minmax-rndnu-neon-mlal.c65 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local
152 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
212 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
271 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
305 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
324 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
347 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mlal.c69 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
190 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
276 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
353 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
404 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
426 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
453 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
D3x16c4-minmax-rndnu-neon-mull-dup.c69 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
165 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
217 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
274 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
306 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
333 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c69 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
165 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
217 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
274 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
306 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
333 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c69 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
168 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
220 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
277 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
309 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
336 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
D3x16c4-minmax-rndnu-neon-mlal-ld1r.c69 int32x4_t vacc1xCD = vacc0xCD; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local
204 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
291 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
376 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
428 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
485 vacc1xCD = vpadalq_s16(vacc1xCD, vprod1xCDc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
517 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
544 const int32x2_t vsum1xCD = vpadd_s32(vget_low_s32(vacc1xCD), vget_high_s32(vacc1xCD)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()

12