Home
last modified time | relevance | path

Searched refs:vacc1x67 (Results 1 – 25 of 145) sorted by relevance

123456

/external/XNNPACK/src/qc8-gemm/gen/
D2x8c4-minmax-fp32-neonv8-mlal-dup.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local
109 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
142 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
178 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
197 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
228 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
235 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
247 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
D2x8c4-minmax-fp32-neon-mlal-ld2r.c57 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local
108 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
141 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
177 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
196 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
227 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
234 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
246 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
D2x8c4-minmax-fp32-neon-mlal-ld1r.c57 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local
112 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
145 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
183 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
202 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
233 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
240 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
252 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
D2x8c4-minmax-fp32-neonv8-mlal-ld2r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local
109 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
142 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
178 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
197 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
228 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
235 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
247 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
D2x8c4-minmax-fp32-neonv8-mlal-ld1r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local
113 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
146 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
184 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
203 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
234 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
241 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
253 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
D2x8c4-minmax-fp32-neon-mlal-dup.c57 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local
108 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
141 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
177 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
196 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
227 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
234 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
246 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c4-minmax-rndnu-neon-mlal-ld1r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r() local
125 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r()
158 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r()
196 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r()
215 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r()
246 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r()
255 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r()
267 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r()
D2x8c4-minmax-rndnu-neon-mlal-ld2r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r() local
121 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r()
154 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r()
190 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r()
209 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r()
240 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r()
249 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r()
261 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r()
D2x8c4-minmax-rndnu-neon-mlal-dup.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() local
121 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
154 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
190 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
209 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
240 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
249 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
261 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
D2x8c4-minmax-fp32-neon-mlal-ld2r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local
121 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
154 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
190 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
209 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
240 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
249 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
261 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
D2x8c4-minmax-fp32-neonv8-mlal-dup.c59 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local
122 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
155 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
191 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
210 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
241 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
250 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
262 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
D2x8c4-minmax-fp32-neon-mlal-ld1r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local
125 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
158 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
196 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
215 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
246 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
255 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
267 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
D2x8c4-minmax-fp32-neon-mlal-dup.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local
121 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
154 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
190 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
209 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
240 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
249 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
261 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
/external/XNNPACK/src/qc8-igemm/gen/
D2x8c4-minmax-fp32-neon-mlal-ld1r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local
125 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
158 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
196 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
215 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
246 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
255 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
267 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
D2x8c4-minmax-fp32-neonv8-mlal-dup.c59 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local
122 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
155 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
191 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
210 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
241 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
250 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
262 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
D2x8c4-minmax-fp32-neonv8-mlal-ld2r.c59 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local
122 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
155 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
191 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
210 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
241 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
250 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
262 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
D2x8c4-minmax-fp32-neonv8-mlal-ld1r.c59 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local
126 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
159 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
197 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
216 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
247 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
256 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
268 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
D2x8c4-minmax-fp32-neon-mlal-dup.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local
121 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
154 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
190 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
209 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
240 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
249 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
261 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
D2x8c4-minmax-fp32-neon-mlal-ld2r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local
121 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
154 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
190 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
209 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
240 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
249 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
261 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
/external/XNNPACK/src/qs8-gemm/gen/
D2x8c4-minmax-fp32-neonv8-mlal-dup.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local
109 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
142 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
178 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
197 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
228 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
235 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
247 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
D2x8c4-minmax-rndnu-neon-mlal-dup.c57 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() local
108 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
141 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
177 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
196 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
227 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
234 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
246 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
D2x8c4-minmax-fp32-neon-mlal-dup.c57 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local
108 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
141 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
177 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
196 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
227 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
234 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
246 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
D2x8c4-minmax-fp32-neonv8-mlal-ld2r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local
109 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
142 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
178 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
197 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
228 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
235 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
247 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
D2x8c4-minmax-fp32-neon-mlal-ld2r.c57 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local
108 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
141 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
177 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
196 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
227 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
234 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
246 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
D2x8c4-minmax-fp32-neonv8-mlal-ld1r.c58 int32x4_t vacc1x67 = vacc0x67; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local
113 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
146 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
184 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
203 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
234 vacc1x67 = vpadalq_s16(vacc1x67, vprod1x67c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
241 int32x4_t vacc1x4567 = vpaddq_s32(vacc1x45, vacc1x67); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
253 const int32x2_t vsum1x67 = vpadd_s32(vget_low_s32(vacc1x67), vget_high_s32(vacc1x67)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()

123456