Home
last modified time | relevance | path

Searched refs:vsum1x01 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mull-padal.c123 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
129 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
150 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
152 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c139 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
145 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
166 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
168 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c198 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
204 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
225 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
227 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c154 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
164 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
187 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
189 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c178 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
188 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
211 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
213 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c185 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
199 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
224 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
226 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c183 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
195 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
232 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
234 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c255 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
265 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
288 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
290 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c215 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
227 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
264 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
266 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c217 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
231 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
256 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
258 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c312 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
326 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
351 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
353 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c238 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
258 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
299 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
301 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c322 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
334 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
371 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
373 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mull-padal.c139 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
145 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
166 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
168 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c155 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
161 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
182 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
184 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c214 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
220 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
241 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
243 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c172 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
182 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
205 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
207 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c196 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
206 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
229 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
231 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c205 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
219 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
244 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
246 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c199 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
211 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
248 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
250 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c273 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
283 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
306 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
308 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c237 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
251 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
276 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
278 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c231 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
243 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
280 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
282 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c256 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
276 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
317 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
319 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c338 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
350 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
387 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
389 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()

12