Home
last modified time | relevance | path

Searched refs:vacc2x4 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-neon-mlal-padal.c76 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
149 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
224 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
261 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
305 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c76 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
123 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
160 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
204 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c76 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
141 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
184 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
228 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c82 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
147 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
191 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
241 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c82 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
179 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
274 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
318 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
368 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c82 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
170 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
223 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
273 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c92 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
147 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
248 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
330 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c92 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
181 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
336 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
437 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
519 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c92 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
173 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
296 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
378 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c98 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
179 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
303 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
397 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mlal-padal.c98 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
219 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
418 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
542 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
636 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c98 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
210 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
367 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
461 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-neon-mull-padal.c75 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
138 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
178 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
222 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c75 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
164 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
239 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
279 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
323 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c75 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
156 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
202 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
246 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c79 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
164 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
211 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
261 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mlal-padal.c79 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
196 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
291 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
338 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
388 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c79 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
187 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
243 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
293 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c91 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
162 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
266 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
348 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c91 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
196 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
351 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
455 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
537 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c91 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
188 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
314 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
396 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c95 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
196 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
323 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
417 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mlal-padal.c95 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
236 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
435 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
562 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
656 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c95 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
227 vacc2x4 = vpadalq_s16(vacc2x4, vprod2x4); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
387 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
481 const int32x2_t vpsum2x4 = vadd_s32(vget_low_s32(vacc2x4), vget_high_s32(vacc2x4)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()