/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 117 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 154 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 217 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 231 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 253 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 117 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 154 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 217 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 231 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 253 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 121 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 158 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 221 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 235 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 257 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 124 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 184 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 244 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 280 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 307 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 329 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 110 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 146 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 174 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 196 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 195 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 261 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 298 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 361 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 375 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 397 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 195 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 261 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 298 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 361 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 375 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 397 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 75 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 142 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 203 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 273 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 310 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 373 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 387 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 409 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 87 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 141 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 210 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 333 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 361 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 403 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 87 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 141 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 210 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 333 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 361 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 403 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 87 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 145 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 214 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 337 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 365 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 407 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 87 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() 202 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() 268 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() 310 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 141 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 201 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 261 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 297 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 327 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 349 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 127 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 163 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 194 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 216 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 159 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 220 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 290 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 327 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 390 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 406 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 428 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 151 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 212 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 278 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 315 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 378 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 394 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 416 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 151 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 212 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 278 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 315 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 378 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 394 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 416 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 171 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 234 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 250 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 272 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 134 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 171 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 234 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 250 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 272 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 72 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 138 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 175 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 238 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 254 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 276 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 158 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 227 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 350 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 380 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 422 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 162 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 231 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 354 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 384 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 426 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 158 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 227 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 350 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 380 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 422 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 151 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() 219 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() 288 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() 330 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 84 int32x4_t vacc3x23 = vacc0x23; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 183 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 296 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 426 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 495 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 618 vacc3x23 = vpadalq_s16(vacc3x23, vprod3x23c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 648 int32x4_t vacc3x0123 = vpaddq_s32(vacc3x01, vacc3x23); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 690 const int32x2_t vsum3x23 = vpadd_s32(vget_low_s32(vacc3x23), vget_high_s32(vacc3x23)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|