Home
last modified time | relevance | path

Searched refs:vsum1x23 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mull-padal.c124 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
129 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
151 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
152 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c140 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
145 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
167 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
168 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c199 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
204 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
226 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
227 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c155 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
164 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
188 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
189 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c179 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
188 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
212 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
213 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c186 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
199 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
225 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
226 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c184 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
195 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
233 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
234 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c256 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
265 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
289 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
290 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c216 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
227 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
265 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
266 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c218 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
231 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
257 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
258 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c313 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
326 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
352 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
353 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c239 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
258 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
300 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
301 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c323 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
334 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
372 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
373 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mull-padal.c140 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
145 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
167 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
168 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c156 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
161 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
183 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
184 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c215 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
220 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
242 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
243 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c173 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
182 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
206 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
207 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c197 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
206 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
230 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
231 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c206 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
219 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
245 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
246 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c200 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
211 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
249 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
250 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c274 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
283 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
307 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
308 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c238 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
251 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
277 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
278 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c232 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
243 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
281 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
282 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c257 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
276 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
318 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
319 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c339 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
350 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
388 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
389 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()

12