Home
last modified time | relevance | path

Searched refs:vacc2x6 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-neon-mlal-padal.c78 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
169 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
238 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
262 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
307 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c78 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
137 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
161 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
206 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c78 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
159 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
185 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
230 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c84 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
165 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
192 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
243 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c84 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
205 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
292 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
319 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
370 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c84 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
194 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
224 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
275 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c94 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
161 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
249 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
332 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c94 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
201 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
350 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
438 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
521 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c94 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
191 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
297 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
380 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c100 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
197 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
304 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
399 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mlal-padal.c100 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
245 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
436 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
543 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
638 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c100 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
234 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
368 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
463 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-neon-mull-padal.c77 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
152 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
179 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
224 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c77 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
184 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
253 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
280 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
325 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c77 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
174 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
203 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
248 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c81 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
182 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
212 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
263 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c81 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
222 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
309 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
339 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
390 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c81 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
211 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
244 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
295 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c93 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
176 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
267 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
350 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c93 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
216 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
365 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
456 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
539 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c93 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
206 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
315 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
398 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c97 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
214 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
324 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
419 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mlal-padal.c97 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
262 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
453 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
563 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
658 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c97 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
251 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
388 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
483 const int32x2_t vpsum2x6 = vadd_s32(vget_low_s32(vacc2x6), vget_high_s32(vacc2x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()