/external/clang/test/CodeGen/ |
D | arm-v8.1a-neon-intrinsics.c | 24 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlah_s32() 66 int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlah_lane_s32() 90 int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { in test_vqrdmlahq_lane_s32() 112 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlsh_s32() 154 int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { in test_vqrdmlsh_lane_s32() 178 int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { in test_vqrdmlshq_lane_s32()
|
D | aarch64-neon-2velem.c | 30 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { in test_vmla_lane_s32() 39 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlaq_lane_s32() 66 int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmla_laneq_s32() 102 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { in test_vmls_lane_s32() 111 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlsq_lane_s32() 138 int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmls_laneq_s32() 171 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) { in test_vmul_lane_s32() 179 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { in test_vmulq_lane_s32() 235 int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { in test_vmul_laneq_s32() 518 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlal_lane_s32() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c8-minmax-rndnu-neon-mull.c | 335 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 336 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 337 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 338 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 339 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 340 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 342 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 343 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 344 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 345 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() [all …]
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 268 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 269 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 270 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 271 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 272 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 273 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 275 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 276 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 277 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 278 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() [all …]
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 398 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 399 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 400 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 401 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 402 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 403 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 405 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 406 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 407 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 408 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() [all …]
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 207 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 208 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 209 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 210 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 211 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 212 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 214 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 215 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 216 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 217 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() [all …]
|
D | 2x16c8-minmax-rndnu-neon-mull.c | 201 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 202 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 203 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 204 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 205 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 206 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 208 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 209 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 210 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 211 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() [all …]
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 315 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 316 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 317 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 318 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 319 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 320 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 322 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 323 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 324 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 325 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() [all …]
|
D | 3x8c8-minmax-rndnu-neon-mull.c | 170 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 171 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 172 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 173 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 174 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 175 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 177 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 178 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 179 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 180 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() [all …]
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 232 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 233 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 234 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 235 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 236 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 237 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 239 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 240 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 241 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 242 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() [all …]
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 238 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 239 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 240 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 241 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 242 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 243 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 245 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 246 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 247 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 248 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() [all …]
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 574 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 575 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 576 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 577 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 578 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 579 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 581 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 582 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 583 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 584 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() [all …]
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 457 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 458 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 459 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 460 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 461 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 462 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 464 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 465 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 466 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 467 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() [all …]
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c8-minmax-rndnu-neon-mull.c | 355 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 356 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 357 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 358 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 359 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 360 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 362 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 363 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 364 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 365 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() [all …]
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 286 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 287 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 288 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 289 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 290 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 291 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 293 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 294 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 295 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 296 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() [all …]
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 418 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 419 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 420 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 421 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 422 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 423 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 425 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 426 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 427 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 428 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() [all …]
|
D | 2x16c8-minmax-rndnu-neon-mull.c | 217 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 218 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 219 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 220 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 221 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 222 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 224 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 225 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 226 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 227 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() [all …]
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 227 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 228 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 229 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 230 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 231 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 232 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 234 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 235 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 236 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 237 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() [all …]
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 333 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 334 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 335 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 336 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 337 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 338 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 340 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 341 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 342 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 343 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() [all …]
|
D | 3x8c8-minmax-rndnu-neon-mull.c | 188 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 189 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 190 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 191 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 192 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 193 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 195 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 196 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 197 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 198 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() [all …]
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 258 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 259 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 260 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 261 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 262 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 263 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 265 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 266 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 267 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 268 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() [all …]
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 248 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 249 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 250 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 251 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 252 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 253 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 255 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 256 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 257 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 258 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() [all …]
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 594 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 595 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 596 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 597 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 598 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 599 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 601 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 602 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 603 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 604 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() [all …]
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 475 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 476 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 477 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 478 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 479 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 480 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 482 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 483 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 484 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 485 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() [all …]
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 211 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 212 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 213 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 214 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 215 const int32x2_t vsum0x01 = vpadd_s32(vpsum0x0, vpsum0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 216 const int32x2_t vsum0x23 = vpadd_s32(vpsum0x2, vpsum0x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 218 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 219 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 220 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 221 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() [all …]
|