Home
last modified time | relevance | path

Searched refs:vpadd_s32 (Results 1 – 25 of 71) sorted by relevance

123

/external/XNNPACK/src/qs8-igemm/gen/
D4x16c8-minmax-neon-mull-padal.c358 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
359 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
365 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
366 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
372 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
373 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
379 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
380 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
386 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
387 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
[all …]
D3x16c8-minmax-neon-mull-padal.c289 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
290 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
296 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
297 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
303 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
304 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
310 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
311 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
317 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
318 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
[all …]
D4x8c8-minmax-neon-mull-padal.c230 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
231 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
237 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
238 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
244 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
245 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
251 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
252 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
258 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
259 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
[all …]
D2x16c8-minmax-neon-mull-padal.c220 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
221 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
227 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
228 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
234 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
235 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
241 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
242 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
248 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
249 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
[all …]
D4x16c16-minmax-neon-mlal-padal.c422 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
423 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
429 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
430 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
436 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
437 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
443 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
444 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
450 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
451 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c337 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
338 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
344 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
345 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
351 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
352 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
358 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
359 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
365 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
366 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mull-padal.c191 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
192 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
198 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
199 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
205 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
206 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
212 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
213 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
219 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
220 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c262 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
263 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
269 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
270 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
276 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
277 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
283 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
284 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
290 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
291 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c215 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
216 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
222 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
223 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
229 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
230 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
236 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
237 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
243 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
244 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c252 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
253 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
259 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
260 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
266 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
267 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
273 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
274 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
280 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
281 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x16c8-minmax-neon-mlal-padal.c597 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
598 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
604 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
605 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
611 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
612 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
618 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
619 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
625 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
626 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x16c8-minmax-neon-mull-padal.c338 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
339 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
345 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
346 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
352 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
353 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
359 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
360 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
366 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
367 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
[all …]
D3x16c8-minmax-neon-mull-padal.c271 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
272 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
278 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
279 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
285 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
286 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
292 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
293 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
299 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
300 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
[all …]
D4x8c8-minmax-neon-mull-padal.c210 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
211 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
217 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
218 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
224 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
225 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
231 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
232 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
238 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
239 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
[all …]
D2x16c8-minmax-neon-mull-padal.c204 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
205 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
211 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
212 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
218 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
219 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
225 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
226 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
232 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
233 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
[all …]
D4x16c16-minmax-neon-mlal-padal.c402 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
403 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
409 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
410 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
416 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
417 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
423 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
424 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
430 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
431 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c319 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
320 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
326 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
327 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
333 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
334 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
340 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
341 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
347 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
348 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mull-padal.c173 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
174 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
180 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
181 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
187 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
188 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
194 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
195 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
201 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
202 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c236 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
237 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
243 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
244 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
250 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
251 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
257 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
258 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
264 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
265 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c242 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
243 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
249 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
250 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
256 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
257 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
263 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
264 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
270 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
271 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D2x8c8-minmax-neon-mull-padal.c136 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
137 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
143 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
144 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
150 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
151 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
157 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
158 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c197 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
198 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
204 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
205 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
211 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
212 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
218 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
219 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
225 const int32x2_t vsum2x01 = vpadd_s32(vpsum2x0, vpsum2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
226 const int32x2_t vsum2x23 = vpadd_s32(vpsum2x2, vpsum2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D3x16c8-minmax-neon-mlal-padal.c460 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
461 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
467 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
468 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
474 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
475 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
481 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
482 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
488 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
489 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D4x16c8-minmax-neon-mlal-padal.c577 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
578 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
584 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
585 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
591 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
592 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
598 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
599 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
605 const int32x2_t vsum1x01 = vpadd_s32(vpsum1x0, vpsum1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
606 const int32x2_t vsum1x23 = vpadd_s32(vpsum1x2, vpsum1x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
[all …]
D1x16c8-minmax-neon-mull-padal.c137 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
138 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
144 const int32x2_t vsum0x45 = vpadd_s32(vpsum0x4, vpsum0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
145 const int32x2_t vsum0x67 = vpadd_s32(vpsum0x6, vpsum0x7); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
151 const int32x2_t vsum0x89 = vpadd_s32(vpsum0x8, vpsum0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
152 const int32x2_t vsum0xAB = vpadd_s32(vpsum0xA, vpsum0xB); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
158 const int32x2_t vsum0xCD = vpadd_s32(vpsum0xC, vpsum0xD); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
159 const int32x2_t vsum0xEF = vpadd_s32(vpsum0xE, vpsum0xF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()

123