Home
last modified time | relevance | path

Searched refs:vacc1xEF (Results 1 – 25 of 48) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x16c4-minmax-rndnu-neon-mull-dup.c65 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
125 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
160 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
211 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
222 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
246 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
D2x16c4-minmax-rndnu-neon-mull-ld1r.c65 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
127 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
162 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
213 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
224 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
248 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
D2x16c4-minmax-rndnu-neon-mull-ld2r.c65 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
125 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
160 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
211 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
222 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
246 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
D2x16c4s2-minmax-rndnu-neon-mull.c65 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local
120 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
154 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
167 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
191 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
D2x16c4-minmax-rndnu-neon-mlal-ld1r.c65 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local
156 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
217 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
279 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
314 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
365 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
376 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
400 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
D2x16c4-minmax-rndnu-neon-mlal-dup.c65 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local
152 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
213 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
273 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
308 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
359 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
370 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
394 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
D2x16c4-minmax-rndnu-neon-mlal-ld2r.c65 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local
152 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
213 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
273 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
308 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
359 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
370 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
394 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
D2x16c4s2-minmax-rndnu-neon-mlal.c65 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local
146 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
206 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
262 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
296 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
308 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
332 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mlal.c71 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
185 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
271 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
344 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
395 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
408 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
436 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c71 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
159 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
211 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
264 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
292 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
320 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c71 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
156 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
208 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
261 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
289 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
317 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D3x16c4-minmax-rndnu-neon-mull-dup.c71 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
156 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
208 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
261 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
289 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
317 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D2x16c4-minmax-rndnu-neon-mull-ld2r.c66 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
138 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
173 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
224 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
237 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
261 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
D2x16c4s2-minmax-rndnu-neon-mull.c66 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local
133 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
167 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
183 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
207 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
D2x16c4-minmax-rndnu-neon-mlal-dup.c66 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local
165 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
226 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
286 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
321 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
372 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
385 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
409 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
D2x16c4-minmax-rndnu-neon-mlal-ld1r.c66 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local
169 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
230 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
292 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
327 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
378 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
391 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
415 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
D2x16c4-minmax-rndnu-neon-mlal-ld2r.c66 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local
165 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
226 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
286 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
321 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
372 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
385 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
409 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
D2x16c4-minmax-rndnu-neon-mull-dup.c66 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
138 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
173 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
224 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
237 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
261 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
D2x16c4-minmax-rndnu-neon-mull-ld1r.c66 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
140 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
175 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
226 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
239 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
263 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
D2x16c4s2-minmax-rndnu-neon-mlal.c66 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local
159 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
219 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
275 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
309 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
324 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
348 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
D3x16c4s2-minmax-rndnu-neon-mlal.c70 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local
200 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
286 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
359 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
410 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
426 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
454 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
D3x16c4-minmax-rndnu-neon-mull-dup.c70 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local
171 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
223 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
276 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
306 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
334 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
D3x16c4-minmax-rndnu-neon-mull-ld2r.c70 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local
171 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
223 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
276 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
306 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
334 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
D3x16c4-minmax-rndnu-neon-mull-ld1r.c70 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local
174 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
226 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
279 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
309 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
337 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
D3x16c4-minmax-rndnu-neon-mlal-ld1r.c70 int32x4_t vacc1xEF = vacc0xEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local
214 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
301 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
382 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
434 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
487 vacc1xEF = vpadalq_s16(vacc1xEF, vprod1xEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
517 int32x4_t vacc1xCDEF = vpaddq_s32(vacc1xCD, vacc1xEF); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
545 const int32x2_t vsum1xEF = vpadd_s32(vget_low_s32(vacc1xEF), vget_high_s32(vacc1xEF)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()

12