Home
last modified time | relevance | path

Searched refs:int32x2_t (Results 1 – 25 of 193) sorted by relevance

12345678

/external/llvm-project/clang/test/CodeGen/
Darm-neon-range-checks.c6 void test_vdot_lane(int32x2_t r, int8x8_t a, int8x8_t b) { in test_vdot_lane()
21 void test_vdot_laneq(int32x2_t r, int8x8_t a, int8x16_t b) { in test_vdot_laneq()
36 void test_vdup_lane(int32x2_t v) { in test_vdup_lane()
43 void test_vdupq_lane(int32x2_t v) { in test_vdupq_lane()
66 void test_vmla_lane(int32x2_t a, int32x2_t b, int32x2_t v) { in test_vmla_lane()
73 void test_vmlaq_lane(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlaq_lane()
81 void test_vmla_laneq(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmla_laneq()
95 void test_vmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) { in test_vmlal_high_lane()
110 void test_vmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlal_lane()
118 void test_vmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) { in test_vmlal_laneq()
[all …]
Darm-v8.1a-neon-intrinsics.c24 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlah_s32()
66 int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlah_lane_s32()
90 int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { in test_vqrdmlahq_lane_s32()
112 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlsh_s32()
154 int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlsh_lane_s32()
178 int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { in test_vqrdmlshq_lane_s32()
Daarch64-neon-2velem.c43 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { in test_vmla_lane_s32()
56 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlaq_lane_s32()
95 int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmla_laneq_s32()
147 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { in test_vmls_lane_s32()
160 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlsq_lane_s32()
199 int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmls_laneq_s32()
248 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) { in test_vmul_lane_s32()
260 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { in test_vmulq_lane_s32()
344 int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { in test_vmul_laneq_s32()
675 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlal_lane_s32()
[all …]
Daarch64-v8.6a-neon-intrinsics.c34 int32x2_t test_vusdot_s32(int32x2_t r, uint8x8_t a, int8x8_t b) { in test_vusdot_s32()
47 int32x2_t test_vusdot_lane_s32(int32x2_t r, uint8x8_t a, int8x8_t b) { in test_vusdot_lane_s32()
60 int32x2_t test_vsudot_lane_s32(int32x2_t r, int8x8_t a, uint8x8_t b) { in test_vsudot_lane_s32()
73 int32x2_t test_vusdot_laneq_s32(int32x2_t r, uint8x8_t a, int8x16_t b) { in test_vusdot_laneq_s32()
86 int32x2_t test_vsudot_laneq_s32(int32x2_t r, int8x8_t a, uint8x16_t b) { in test_vsudot_laneq_s32()
/external/clang/test/CodeGen/
Darm-v8.1a-neon-intrinsics.c24 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlah_s32()
66 int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlah_lane_s32()
90 int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { in test_vqrdmlahq_lane_s32()
112 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlsh_s32()
154 int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlsh_lane_s32()
178 int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { in test_vqrdmlshq_lane_s32()
Daarch64-neon-2velem.c30 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { in test_vmla_lane_s32()
39 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlaq_lane_s32()
66 int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmla_laneq_s32()
102 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { in test_vmls_lane_s32()
111 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlsq_lane_s32()
138 int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmls_laneq_s32()
171 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) { in test_vmul_lane_s32()
179 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { in test_vmulq_lane_s32()
235 int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { in test_vmul_laneq_s32()
518 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlal_lane_s32()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D4x16c8-minmax-neon-mull-padal.c354 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
355 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
356 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
357 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
358 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
359 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
361 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
362 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
363 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
364 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
[all …]
D3x16c8-minmax-neon-mull-padal.c285 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
286 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
287 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
288 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
289 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
290 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
292 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
293 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
294 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
295 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
[all …]
D4x8c8-minmax-neon-mull-padal.c226 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
227 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
228 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
229 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
230 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
231 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
233 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
234 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
235 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
236 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
[all …]
D2x16c8-minmax-neon-mull-padal.c216 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
217 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
218 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
219 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
220 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
221 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
223 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
224 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
225 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
226 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
[all …]
D4x16c16-minmax-neon-mlal-padal.c418 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
419 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
420 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
421 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
422 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
423 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
425 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
426 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
427 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
428 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c333 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
334 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
335 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
336 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
337 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
338 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
340 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
341 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
342 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
343 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mull-padal.c187 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
188 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
189 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
190 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
191 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
192 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
194 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
195 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
196 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
197 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c258 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
259 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
260 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
261 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
262 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
263 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
265 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
266 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
267 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
268 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x16c8-minmax-neon-mull-padal.c334 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
335 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
336 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
337 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
338 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
339 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
341 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
342 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
343 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
344 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
[all …]
D3x16c8-minmax-neon-mull-padal.c267 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
268 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
269 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
270 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
271 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
272 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
274 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
275 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
276 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
277 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
[all …]
D4x8c8-minmax-neon-mull-padal.c206 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
207 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
208 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
209 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
210 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
211 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
213 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
214 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
215 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
216 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
[all …]
D2x16c8-minmax-neon-mull-padal.c200 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
201 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
202 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
203 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
204 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
205 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
207 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
208 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
209 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
210 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
[all …]
D4x16c16-minmax-neon-mlal-padal.c398 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
399 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
400 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
401 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
402 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
403 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
405 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
406 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
407 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
408 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c315 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
316 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
317 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
318 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
319 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
320 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
322 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
323 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
324 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
325 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mull-padal.c169 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
170 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
171 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
172 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
173 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
174 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
176 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
177 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
178 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
179 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c232 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
233 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
234 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
235 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
236 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
237 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
239 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
240 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
241 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
242 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c238 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
239 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
240 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
241 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
242 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
243 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
245 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
246 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
247 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
248 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D2x8c8-minmax-neon-mull-padal.c132 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
133 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
134 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
135 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
136 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
137 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
139 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
140 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
141 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
142 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c193 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
194 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
195 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
196 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
197 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
198 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
200 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
201 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
202 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
203 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]

12345678